From 133903502e319d36510dd692586a0074ef49b65c Mon Sep 17 00:00:00 2001 From: zverevgeny Date: Sun, 16 Jun 2024 13:16:04 +0300 Subject: [PATCH] Merge stable-24-1-16-analytics into stable-24-1 (#5596) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Vitalii Gridnev Co-authored-by: Andrey Neporada Co-authored-by: niksaveliev Co-authored-by: Sergey Veselov Co-authored-by: alexnick88 Co-authored-by: Ilnaz Nizametdinov Co-authored-by: Iuliia Sidorina Co-authored-by: kungurtsev Co-authored-by: Nikolay Shestakov Co-authored-by: azevaykin <145343289+azevaykin@users.noreply.github.com> Co-authored-by: DimasKovas <34828390+DimasKovas@users.noreply.github.com> Co-authored-by: ijon Co-authored-by: vporyadke Co-authored-by: kruall Co-authored-by: ivanmorozov333 Co-authored-by: Alexander Rutkovsky Co-authored-by: ildar-khisambeev Co-authored-by: nsofya Co-authored-by: nsofya Co-authored-by: Sofya Novozhilova Co-authored-by: Олег <150132506+iddqdex@users.noreply.github.com> Co-authored-by: Ivan Morozov Co-authored-by: Andrei Rykov Co-authored-by: qyryq Co-authored-by: Daniil Cherednik Co-authored-by: Aleksei Borzenkov --- .github/config/muted_test.txt | 2 - .github/config/muted_ya.txt | 11 +- ydb/core/base/blobstorage.h | 4 +- .../formats/arrow/arrow_batch_builder.cpp | 18 +- ydb/core/formats/arrow/arrow_batch_builder.h | 10 +- ydb/core/formats/arrow/arrow_filter.cpp | 30 +- ydb/core/formats/arrow/arrow_filter.h | 7 + ydb/core/formats/arrow/arrow_helpers.cpp | 195 +- ydb/core/formats/arrow/arrow_helpers.h | 36 +- ydb/core/formats/arrow/common/accessor.cpp | 99 + ydb/core/formats/arrow/common/accessor.h | 199 + ydb/core/formats/arrow/common/adapter.cpp | 5 + ydb/core/formats/arrow/common/adapter.h | 96 + ydb/core/formats/arrow/common/container.cpp | 116 + ydb/core/formats/arrow/common/container.h | 61 + ydb/core/formats/arrow/common/ya.make | 5 + ydb/core/formats/arrow/converter.cpp | 45 +- ydb/core/formats/arrow/converter.h | 8 +- ydb/core/formats/arrow/dictionary/object.h | 6 +- ydb/core/formats/arrow/hash/calcer.cpp | 47 - ydb/core/formats/arrow/hash/calcer.h | 72 +- ydb/core/formats/arrow/hash/ya.make | 1 + .../arrow/merging_sorted_input_stream.cpp | 303 -- .../arrow/merging_sorted_input_stream.h | 54 - .../formats/arrow/one_batch_input_stream.h | 36 - ydb/core/formats/arrow/permutations.cpp | 56 +- ydb/core/formats/arrow/permutations.h | 42 +- ydb/core/formats/arrow/program.cpp | 113 +- ydb/core/formats/arrow/program.h | 31 +- .../formats/arrow/reader/batch_iterator.cpp | 51 + .../formats/arrow/reader/batch_iterator.h | 89 + ydb/core/formats/arrow/reader/heap.cpp | 5 + ydb/core/formats/arrow/reader/heap.h | 126 + .../arrow/reader/merger.cpp} | 102 +- ydb/core/formats/arrow/reader/merger.h | 103 + .../{read_filter_merger.cpp => position.cpp} | 31 +- .../{read_filter_merger.h => position.h} | 65 +- .../formats/arrow/reader/result_builder.cpp | 71 + .../formats/arrow/reader/result_builder.h | 38 + ydb/core/formats/arrow/reader/ya.make | 8 +- ydb/core/formats/arrow/replace_key.h | 112 +- ydb/core/formats/arrow/serializer/abstract.h | 68 +- ydb/core/formats/arrow/serializer/native.h | 20 + ydb/core/formats/arrow/size_calcer.cpp | 26 + ydb/core/formats/arrow/size_calcer.h | 2 + ydb/core/formats/arrow/sort_cursor.h | 262 -- ydb/core/formats/arrow/special_keys.cpp | 19 +- ydb/core/formats/arrow/special_keys.h | 3 + ydb/core/formats/arrow/switch/compare.cpp | 5 + ydb/core/formats/arrow/switch/compare.h | 115 + ydb/core/formats/arrow/switch/switch_type.h | 4 +- ydb/core/formats/arrow/switch/ya.make | 1 + ydb/core/formats/arrow/transformer/abstract.h | 10 + .../formats/arrow/transformer/dictionary.h | 20 + ydb/core/formats/arrow/{ => ut}/ut_arrow.cpp | 193 +- .../arrow/{ => ut}/ut_program_step.cpp | 12 +- ydb/core/formats/arrow/ut/ut_size_calcer.cpp | 6 + ydb/core/formats/arrow/ya.make | 4 - ydb/core/grpc_services/rpc_load_rows.cpp | 16 +- ydb/core/io_formats/arrow/csv_arrow.cpp | 44 +- ydb/core/io_formats/arrow/csv_arrow.h | 9 +- ydb/core/io_formats/arrow/csv_arrow_ut.cpp | 37 +- .../kqp/compute_actor/kqp_compute_events.h | 7 +- .../compute_actor/kqp_scan_compute_manager.h | 11 - ydb/core/kqp/compute_actor/kqp_scan_events.h | 6 +- .../compute_actor/kqp_scan_fetcher_actor.cpp | 11 +- .../tablestore/operations/add_column.cpp | 7 + .../tablestore/operations/add_column.h | 1 + .../tablestore/operations/alter_column.cpp | 7 + .../tablestore/operations/alter_column.h | 1 + .../tablestore/operations/drop_stat.cpp | 21 + .../tablestore/operations/drop_stat.h | 19 + .../tablestore/operations/upsert_index.cpp | 7 + .../tablestore/operations/upsert_index.h | 1 + .../tablestore/operations/upsert_opt.cpp | 20 + .../tablestore/operations/upsert_opt.h | 22 + .../tablestore/operations/upsert_stat.cpp | 49 + .../tablestore/operations/upsert_stat.h | 23 + .../behaviour/tablestore/operations/ya.make | 4 + ydb/core/kqp/runtime/kqp_scan_data.cpp | 104 +- ydb/core/kqp/runtime/kqp_scan_data.h | 19 +- ydb/core/kqp/runtime/kqp_write_actor.cpp | 2 +- ydb/core/kqp/ut/common/columnshard.cpp | 63 +- ydb/core/kqp/ut/common/columnshard.h | 7 +- ydb/core/kqp/ut/common/kqp_ut_common.cpp | 12 + ydb/core/kqp/ut/common/kqp_ut_common.h | 1 + ydb/core/kqp/ut/olap/aggregations_ut.cpp | 1354 +++++++ ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp | 266 ++ ydb/core/kqp/ut/olap/clickbench_ut.cpp | 247 ++ ydb/core/kqp/ut/olap/helpers/aggregation.cpp | 140 + ydb/core/kqp/ut/olap/helpers/aggregation.h | 245 ++ ydb/core/kqp/ut/olap/helpers/get_value.cpp | 112 + ydb/core/kqp/ut/olap/helpers/get_value.h | 15 + ydb/core/kqp/ut/olap/helpers/local.cpp | 22 + ydb/core/kqp/ut/olap/helpers/local.h | 41 + .../kqp/ut/olap/helpers/query_executor.cpp | 73 + ydb/core/kqp/ut/olap/helpers/query_executor.h | 12 + ydb/core/kqp/ut/olap/helpers/typed_local.cpp | 161 + ydb/core/kqp/ut/olap/helpers/typed_local.h | 90 + ydb/core/kqp/ut/olap/helpers/writer.cpp | 16 + ydb/core/kqp/ut/olap/helpers/writer.h | 8 + ydb/core/kqp/ut/olap/helpers/ya.make | 18 + ydb/core/kqp/ut/olap/indexes_ut.cpp | 367 ++ ydb/core/kqp/ut/olap/kqp_olap_ut.cpp | 3296 +---------------- ydb/core/kqp/ut/olap/statistics_ut.cpp | 74 + ydb/core/kqp/ut/olap/sys_view_ut.cpp | 722 ++++ ydb/core/kqp/ut/olap/write_ut.cpp | 111 + ydb/core/kqp/ut/olap/ya.make | 10 +- ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp | 143 +- ydb/core/kqp/ut/scheme/ya.make | 1 + ydb/core/protos/config.proto | 10 +- ydb/core/protos/counters_columnshard.proto | 14 +- ydb/core/protos/flat_scheme_op.proto | 28 +- ydb/core/protos/tx_columnshard.proto | 11 + ydb/core/protos/ya.make | 1 + ydb/core/sys_view/common/schema.cpp | 4 + ydb/core/sys_view/common/schema.h | 87 + ydb/core/testlib/common_helper.cpp | 4 +- ydb/core/testlib/common_helper.h | 20 +- ydb/core/testlib/cs_helper.cpp | 10 +- ydb/core/testlib/cs_helper.h | 1 + .../tx/columnshard/background_controller.cpp | 37 +- .../tx/columnshard/background_controller.h | 75 +- ydb/core/tx/columnshard/blob.cpp | 186 - ydb/core/tx/columnshard/blob.h | 447 +-- ydb/core/tx/columnshard/blob_cache.cpp | 13 +- ydb/core/tx/columnshard/blob_cache.h | 4 +- .../blobs_action/abstract/action.cpp | 12 - .../blobs_action/abstract/action.h | 42 +- .../blobs_action/abstract/blob_set.cpp | 57 + .../blobs_action/abstract/blob_set.h | 512 +++ .../blobs_action/abstract/common.h | 6 +- .../columnshard/blobs_action/abstract/gc.cpp | 18 +- .../tx/columnshard/blobs_action/abstract/gc.h | 31 +- .../blobs_action/abstract/gc_actor.cpp | 5 + .../blobs_action/abstract/gc_actor.h | 72 + .../blobs_action/abstract/read.cpp | 86 +- .../columnshard/blobs_action/abstract/read.h | 226 +- .../blobs_action/abstract/remove.cpp | 12 +- .../blobs_action/abstract/remove.h | 31 +- .../blobs_action/abstract/storage.h | 50 +- .../abstract/storages_manager.cpp | 78 +- .../blobs_action/abstract/storages_manager.h | 53 +- .../blobs_action/abstract/write.cpp | 15 +- .../columnshard/blobs_action/abstract/write.h | 61 +- .../columnshard/blobs_action/abstract/ya.make | 8 +- .../blobs_action/blob_manager_db.cpp | 144 +- .../blobs_action/blob_manager_db.h | 69 +- .../{ => blobs_action/bs}/blob_manager.cpp | 281 +- .../{ => blobs_action/bs}/blob_manager.h | 82 +- .../tx/columnshard/blobs_action/bs/gc.cpp | 105 +- ydb/core/tx/columnshard/blobs_action/bs/gc.h | 35 +- .../columnshard/blobs_action/bs/gc_actor.cpp | 12 +- .../tx/columnshard/blobs_action/bs/gc_actor.h | 26 +- .../tx/columnshard/blobs_action/bs/read.cpp | 9 + .../tx/columnshard/blobs_action/bs/read.h | 12 +- .../tx/columnshard/blobs_action/bs/remove.h | 23 +- .../columnshard/blobs_action/bs/storage.cpp | 25 +- .../tx/columnshard/blobs_action/bs/storage.h | 20 +- .../tx/columnshard/blobs_action/bs/write.cpp | 2 +- .../tx/columnshard/blobs_action/bs/write.h | 13 +- .../tx/columnshard/blobs_action/bs/ya.make | 3 + .../columnshard/blobs_action/common/const.cpp | 5 + .../columnshard/blobs_action/common/const.h | 12 + .../columnshard/blobs_action/common/ya.make | 10 + .../blobs_action/counters/storage.cpp | 14 +- .../blobs_action/counters/storage.h | 22 +- .../columnshard/blobs_action/counters/ya.make | 2 + .../blobs_action/events/delete_blobs.cpp | 5 + .../blobs_action/events/delete_blobs.h | 32 + .../columnshard/blobs_action/events/ya.make | 13 + ydb/core/tx/columnshard/blobs_action/memory.h | 173 - .../blobs_action/protos/blobs.proto | 19 + .../blobs_action/protos/events.proto | 14 + .../columnshard/blobs_action/protos/ya.make | 12 + .../blobs_action/storages_manager/manager.cpp | 50 + .../blobs_action/storages_manager/manager.h | 31 + .../blobs_action/storages_manager/ya.make | 22 + .../columnshard/blobs_action/tier/adapter.cpp | 8 +- .../columnshard/blobs_action/tier/adapter.h | 8 + .../tx/columnshard/blobs_action/tier/gc.cpp | 31 +- .../tx/columnshard/blobs_action/tier/gc.h | 19 +- .../blobs_action/tier/gc_actor.cpp | 25 +- .../columnshard/blobs_action/tier/gc_actor.h | 18 +- .../columnshard/blobs_action/tier/gc_info.h | 24 +- .../tx/columnshard/blobs_action/tier/read.cpp | 20 +- .../tx/columnshard/blobs_action/tier/read.h | 5 +- .../tx/columnshard/blobs_action/tier/remove.h | 21 +- .../columnshard/blobs_action/tier/storage.cpp | 68 +- .../columnshard/blobs_action/tier/storage.h | 25 +- .../columnshard/blobs_action/tier/write.cpp | 11 +- .../tx/columnshard/blobs_action/tier/write.h | 7 +- .../tx/columnshard/blobs_action/tier/ya.make | 2 + .../blobs_action/transaction/tx_draft.cpp | 18 + .../blobs_action/transaction/tx_draft.h | 23 +- .../transaction/tx_gc_indexed.cpp | 4 +- .../transaction/tx_gc_insert_table.cpp | 18 +- .../transaction/tx_remove_blobs.cpp | 22 + .../transaction/tx_remove_blobs.h | 34 + .../blobs_action/transaction/tx_write.cpp | 45 +- .../blobs_action/transaction/tx_write.h | 11 +- .../transaction/tx_write_index.cpp | 41 +- .../blobs_action/transaction/tx_write_index.h | 1 - .../blobs_action/transaction/ya.make | 3 + ydb/core/tx/columnshard/blobs_action/ya.make | 14 +- .../tx/columnshard/blobs_reader/actor.cpp | 9 +- .../blobs_reader/read_coordinator.cpp | 102 +- .../blobs_reader/read_coordinator.h | 47 +- ydb/core/tx/columnshard/blobs_reader/task.cpp | 94 +- ydb/core/tx/columnshard/blobs_reader/task.h | 116 +- ydb/core/tx/columnshard/columnshard.cpp | 34 +- ydb/core/tx/columnshard/columnshard.h | 20 +- ydb/core/tx/columnshard/columnshard__init.cpp | 95 +- .../columnshard/columnshard__progress_tx.cpp | 6 + .../columnshard__propose_transaction.cpp | 96 +- .../tx/columnshard/columnshard__read_base.h | 32 - ydb/core/tx/columnshard/columnshard__scan.cpp | 931 +---- ydb/core/tx/columnshard/columnshard__scan.h | 115 - .../columnshard/columnshard__stats_scan.cpp | 122 - .../tx/columnshard/columnshard__stats_scan.h | 88 - .../tx/columnshard/columnshard__write.cpp | 143 +- .../columnshard/columnshard__write_index.cpp | 5 +- .../tx/columnshard/columnshard_common.cpp | 82 - ydb/core/tx/columnshard/columnshard_common.h | 11 +- ydb/core/tx/columnshard/columnshard_impl.cpp | 548 ++- ydb/core/tx/columnshard/columnshard_impl.h | 178 +- .../columnshard/columnshard_private_events.h | 12 +- ydb/core/tx/columnshard/columnshard_schema.h | 234 +- ydb/core/tx/columnshard/columnshard_ttl.h | 7 +- .../tx/columnshard/columnshard_ut_common.cpp | 83 +- .../tx/columnshard/columnshard_ut_common.h | 208 +- ydb/core/tx/columnshard/common/blob.cpp | 173 + ydb/core/tx/columnshard/common/blob.h | 410 ++ .../common/protos/blob_range.proto | 18 + ydb/core/tx/columnshard/common/protos/ya.make | 1 + ydb/core/tx/columnshard/common/tablet_id.cpp | 12 + ydb/core/tx/columnshard/common/tablet_id.h | 8 + .../columnshard/common/tests/shard_reader.h | 5 +- ydb/core/tx/columnshard/common/ya.make | 2 + ydb/core/tx/columnshard/config.clang-format | 41 + .../tx/columnshard/counters/blobs_manager.cpp | 2 +- .../tx/columnshard/counters/blobs_manager.h | 11 +- .../tx/columnshard/counters/columnshard.cpp | 20 +- .../tx/columnshard/counters/columnshard.h | 13 + .../columnshard/counters/common/histogram.cpp | 83 + .../columnshard/counters/common/histogram.h | 118 + .../tx/columnshard/counters/common/ya.make | 1 + .../tx/columnshard/counters/engine_logs.cpp | 61 +- .../tx/columnshard/counters/engine_logs.h | 219 +- .../tx/columnshard/counters/indexation.cpp | 1 + ydb/core/tx/columnshard/counters/indexation.h | 1 + ydb/core/tx/columnshard/counters/scan.cpp | 40 +- ydb/core/tx/columnshard/counters/scan.h | 100 +- ydb/core/tx/columnshard/counters/ya.make | 1 + .../columnshard/data_locks/locks/abstract.cpp | 5 + .../columnshard/data_locks/locks/abstract.h | 52 + .../data_locks/locks/composite.cpp | 5 + .../columnshard/data_locks/locks/composite.h | 54 + .../tx/columnshard/data_locks/locks/list.cpp | 5 + .../tx/columnshard/data_locks/locks/list.h | 96 + .../columnshard/data_locks/locks/snapshot.cpp | 5 + .../columnshard/data_locks/locks/snapshot.h | 36 + .../tx/columnshard/data_locks/locks/ya.make | 14 + .../data_locks/manager/manager.cpp | 48 + .../columnshard/data_locks/manager/manager.h | 46 + .../tx/columnshard/data_locks/manager/ya.make | 11 + ydb/core/tx/columnshard/data_locks/ya.make | 8 + .../data_sharing/common/context/context.cpp | 62 + .../data_sharing/common/context/context.h | 29 + .../data_sharing/common/context/ya.make | 13 + .../data_sharing/common/session/common.cpp | 66 + .../data_sharing/common/session/common.h | 110 + .../data_sharing/common/session/ya.make | 14 + .../common/transactions/tx_extension.cpp | 5 + .../common/transactions/tx_extension.h | 35 + .../data_sharing/common/transactions/ya.make | 17 + .../columnshard/data_sharing/common/ya.make | 9 + .../destination/events/control.cpp | 14 + .../data_sharing/destination/events/control.h | 33 + .../destination/events/status.cpp | 5 + .../data_sharing/destination/events/status.h | 16 + .../destination/events/transfer.cpp | 68 + .../destination/events/transfer.h | 107 + .../data_sharing/destination/events/ya.make | 16 + .../destination/session/destination.cpp | 187 + .../destination/session/destination.h | 134 + .../data_sharing/destination/session/ya.make | 14 + .../transactions/tx_data_from_source.cpp | 44 + .../transactions/tx_data_from_source.h | 28 + .../tx_finish_ack_from_initiator.cpp | 16 + .../tx_finish_ack_from_initiator.h | 26 + .../transactions/tx_finish_from_source.cpp | 17 + .../transactions/tx_finish_from_source.h | 28 + .../transactions/tx_start_from_initiator.cpp | 33 + .../transactions/tx_start_from_initiator.h | 44 + .../destination/transactions/ya.make | 16 + .../data_sharing/destination/ya.make | 9 + .../initiator/controller/abstract.cpp | 4 + .../initiator/controller/abstract.h | 75 + .../initiator/controller/test.cpp | 4 + .../data_sharing/initiator/controller/test.h | 37 + .../data_sharing/initiator/controller/ya.make | 13 + .../initiator/status/abstract.cpp | 5 + .../data_sharing/initiator/status/abstract.h | 70 + .../data_sharing/initiator/status/ya.make | 14 + .../data_sharing/initiator/ya.make | 8 + .../data_sharing/manager/sessions.cpp | 114 + .../data_sharing/manager/sessions.h | 54 + .../data_sharing/manager/shared_blobs.cpp | 127 + .../data_sharing/manager/shared_blobs.h | 247 ++ .../columnshard/data_sharing/manager/ya.make | 13 + .../modification/events/change_owning.cpp | 13 + .../modification/events/change_owning.h | 30 + .../data_sharing/modification/events/ya.make | 14 + .../modification/tasks/modification.cpp | 43 + .../modification/tasks/modification.h | 364 ++ .../data_sharing/modification/tasks/ya.make | 14 + .../transactions/tx_change_blobs_owning.cpp | 22 + .../transactions/tx_change_blobs_owning.h | 34 + .../modification/transactions/ya.make | 17 + .../data_sharing/modification/ya.make | 9 + .../data_sharing/protos/data.proto | 41 + .../data_sharing/protos/events.proto | 67 + .../data_sharing/protos/initiator.proto | 38 + .../data_sharing/protos/links.proto | 23 + .../data_sharing/protos/sessions.proto | 59 + .../data_sharing/protos/transfer.proto | 19 + .../columnshard/data_sharing/protos/ya.make | 19 + .../data_sharing/source/events/control.cpp | 10 + .../data_sharing/source/events/control.h | 18 + .../data_sharing/source/events/transfer.cpp | 5 + .../data_sharing/source/events/transfer.h | 25 + .../data_sharing/source/events/ya.make | 12 + .../data_sharing/source/session/cursor.cpp | 181 + .../data_sharing/source/session/cursor.h | 108 + .../data_sharing/source/session/source.cpp | 110 + .../data_sharing/source/session/source.h | 91 + .../data_sharing/source/session/ya.make | 14 + .../transactions/tx_data_ack_to_source.cpp | 40 + .../transactions/tx_data_ack_to_source.h | 28 + .../transactions/tx_finish_ack_to_source.cpp | 15 + .../transactions/tx_finish_ack_to_source.h | 27 + .../transactions/tx_start_to_source.cpp | 20 + .../source/transactions/tx_start_to_source.h | 28 + .../transactions/tx_write_source_cursor.cpp | 17 + .../transactions/tx_write_source_cursor.h | 27 + .../data_sharing/source/transactions/ya.make | 14 + .../columnshard/data_sharing/source/ya.make | 9 + ydb/core/tx/columnshard/data_sharing/ya.make | 13 + ydb/core/tx/columnshard/defs.h | 1 + .../engines/changes/abstract/abstract.cpp | 64 +- .../engines/changes/abstract/abstract.h | 153 +- .../engines/changes/abstract/settings.h | 7 - .../actualization/construction/context.cpp | 73 + .../actualization/construction/context.h | 62 + .../actualization/construction/ya.make | 11 + .../actualization/controller/controller.cpp | 15 + .../actualization/controller/controller.h | 31 + .../changes/actualization/controller/ya.make | 11 + .../engines/changes/actualization/ya.make | 8 + .../columnshard/engines/changes/cleanup.cpp | 62 - .../engines/changes/cleanup_portions.cpp | 63 + .../changes/{cleanup.h => cleanup_portions.h} | 28 +- .../engines/changes/cleanup_tables.cpp | 42 + .../engines/changes/cleanup_tables.h | 59 + .../engines/changes/compaction.cpp | 42 +- .../columnshard/engines/changes/compaction.h | 13 +- .../compaction/column_portion_chunk.cpp | 26 +- .../changes/compaction/column_portion_chunk.h | 101 +- .../changes/compaction/merge_context.h | 4 +- .../changes/compaction/merged_column.cpp | 2 +- .../engines/changes/general_compaction.cpp | 87 +- .../engines/changes/general_compaction.h | 14 +- .../engines/changes/indexation.cpp | 67 +- .../columnshard/engines/changes/indexation.h | 23 +- .../tx/columnshard/engines/changes/ttl.cpp | 68 +- ydb/core/tx/columnshard/engines/changes/ttl.h | 43 +- .../engines/changes/with_appended.cpp | 142 +- .../engines/changes/with_appended.h | 36 +- .../tx/columnshard/engines/changes/ya.make | 4 +- .../tx/columnshard/engines/column_engine.cpp | 22 +- .../tx/columnshard/engines/column_engine.h | 107 +- .../engines/column_engine_logs.cpp | 495 ++- .../columnshard/engines/column_engine_logs.h | 163 +- .../tx/columnshard/engines/db_wrapper.cpp | 15 +- ydb/core/tx/columnshard/engines/db_wrapper.h | 3 +- ydb/core/tx/columnshard/engines/defs.h | 8 - ydb/core/tx/columnshard/engines/filter.cpp | 20 +- ydb/core/tx/columnshard/engines/filter.h | 3 - .../columnshard/engines/insert_table/data.h | 13 +- .../engines/insert_table/insert_table.cpp | 41 +- .../engines/insert_table/insert_table.h | 23 +- .../columnshard/engines/insert_table/meta.h | 4 + .../engines/insert_table/path_info.cpp | 4 + .../engines/insert_table/path_info.h | 1 + .../engines/insert_table/rt_insertion.cpp | 16 + .../engines/insert_table/rt_insertion.h | 2 + .../engines/portions/column_record.cpp | 76 +- .../engines/portions/column_record.h | 112 +- .../tx/columnshard/engines/portions/common.h | 8 +- .../engines/portions/index_chunk.cpp | 39 + .../engines/portions/index_chunk.h | 73 + .../tx/columnshard/engines/portions/meta.cpp | 25 +- .../tx/columnshard/engines/portions/meta.h | 16 + .../engines/portions/portion_info.cpp | 617 ++- .../engines/portions/portion_info.h | 510 ++- .../engines/portions/with_blobs.cpp | 206 +- .../columnshard/engines/portions/with_blobs.h | 55 +- .../tx/columnshard/engines/portions/ya.make | 2 + .../engines/reader/abstract/abstract.cpp | 9 + .../engines/reader/abstract/abstract.h | 45 + .../reader/abstract/constructor.cpp} | 70 +- .../engines/reader/abstract/constructor.h | 37 + .../engines/reader/abstract/read_context.cpp | 9 + .../reader/{ => abstract}/read_context.h | 56 +- .../engines/reader/abstract/read_metadata.cpp | 31 + .../engines/reader/abstract/read_metadata.h | 151 + .../engines/reader/abstract/ya.make | 20 + .../engines/reader/actor/actor.cpp | 415 +++ .../columnshard/engines/reader/actor/actor.h | 184 + .../columnshard/engines/reader/actor/ya.make | 13 + .../engines/reader/common/conveyor_task.cpp | 9 + .../reader/{ => common}/conveyor_task.h | 10 +- .../reader/{ => common}/description.cpp | 0 .../engines/reader/{ => common}/description.h | 6 +- .../engines/reader/{ => common}/queue.cpp | 0 .../engines/reader/{ => common}/queue.h | 0 .../engines/reader/common/result.cpp | 51 + .../engines/reader/common/result.h | 87 + .../engines/reader/common/stats.cpp | 28 + .../columnshard/engines/reader/common/stats.h | 41 + .../columnshard/engines/reader/common/ya.make | 16 + .../engines/reader/conveyor_task.cpp | 10 - .../reader/plain_reader/column_assembler.cpp | 41 - .../reader/plain_reader/column_assembler.h | 38 - .../plain_reader/constructor/constructor.cpp | 42 + .../plain_reader/constructor/constructor.h | 17 + .../constructor/read_metadata.cpp | 57 + .../plain_reader/constructor/read_metadata.h | 87 + .../plain_reader/constructor/resolver.cpp | 5 + .../plain_reader/constructor/resolver.h | 32 + .../reader/plain_reader/constructor/ya.make | 13 + .../engines/reader/plain_reader/context.cpp | 177 - .../engines/reader/plain_reader/context.h | 51 - .../engines/reader/plain_reader/fetching.cpp | 98 - .../engines/reader/plain_reader/fetching.h | 220 -- .../engines/reader/plain_reader/interval.cpp | 200 - .../engines/reader/plain_reader/interval.h | 116 - .../{ => iterator}/columns_set.cpp | 6 +- .../plain_reader/{ => iterator}/columns_set.h | 17 +- .../{ => iterator}/constructor.cpp | 12 +- .../plain_reader/{ => iterator}/constructor.h | 14 +- .../reader/plain_reader/iterator/context.cpp | 235 ++ .../reader/plain_reader/iterator/context.h | 61 + .../{ => iterator}/fetched_data.cpp | 7 +- .../{ => iterator}/fetched_data.h | 53 +- .../reader/plain_reader/iterator/fetching.cpp | 141 + .../reader/plain_reader/iterator/fetching.h | 294 ++ .../reader/plain_reader/iterator/interval.cpp | 94 + .../reader/plain_reader/iterator/interval.h | 92 + .../plain_reader/iterator/iterator.cpp} | 28 +- .../reader/plain_reader/iterator/iterator.h} | 66 +- .../reader/plain_reader/iterator/merge.cpp | 157 + .../reader/plain_reader/iterator/merge.h | 110 + .../{ => iterator}/plain_read_data.cpp | 14 +- .../{ => iterator}/plain_read_data.h | 26 +- .../reader/plain_reader/iterator/scanner.cpp | 302 ++ .../reader/plain_reader/iterator/scanner.h | 123 + .../plain_reader/{ => iterator}/source.cpp | 132 +- .../plain_reader/{ => iterator}/source.h | 198 +- .../reader/plain_reader/iterator/ya.make | 23 + .../engines/reader/plain_reader/scanner.cpp | 143 - .../engines/reader/plain_reader/scanner.h | 76 - .../engines/reader/plain_reader/ya.make | 14 +- .../engines/reader/read_context.cpp | 16 - .../engines/reader/read_filter_merger.h | 423 --- .../engines/reader/read_metadata.cpp | 112 - .../engines/reader/read_metadata.h | 303 -- .../reader/sys_view/abstract/granule_view.cpp | 5 + .../reader/sys_view/abstract/granule_view.h | 43 + .../reader/sys_view/abstract/iterator.cpp | 5 + .../reader/sys_view/abstract/iterator.h | 155 + .../reader/sys_view/abstract/metadata.cpp | 5 + .../reader/sys_view/abstract/metadata.h | 26 + .../engines/reader/sys_view/abstract/ya.make | 14 + .../engines/reader/sys_view/chunks/chunks.cpp | 110 + .../engines/reader/sys_view/chunks/chunks.h | 39 + .../engines/reader/sys_view/chunks/ya.make | 12 + .../sys_view/constructor/constructor.cpp | 5 + .../reader/sys_view/constructor/constructor.h | 85 + .../reader/sys_view/constructor/ya.make | 11 + .../reader/sys_view/granules/granules.cpp | 33 + .../reader/sys_view/granules/granules.h | 42 + .../engines/reader/sys_view/granules/ya.make | 12 + .../reader/sys_view/portions/portions.cpp | 58 + .../reader/sys_view/portions/portions.h | 39 + .../engines/reader/sys_view/portions/ya.make | 12 + .../engines/reader/sys_view/ya.make | 15 + .../engines/reader/transaction/tx_scan.cpp | 274 ++ .../engines/reader/transaction/tx_scan.h | 28 + .../engines/reader/transaction/ya.make | 15 + .../tx/columnshard/engines/reader/ya.make | 14 +- .../engines/scheme/abstract/index_info.cpp | 20 + .../engines/scheme/abstract/index_info.h | 39 + .../engines/scheme/abstract/loader.cpp | 60 + .../engines/scheme/abstract/loader.h | 49 + .../engines/scheme/abstract/saver.cpp | 31 + .../engines/scheme/abstract/saver.h | 34 + .../engines/scheme/abstract/ya.make | 17 + .../engines/scheme/abstract_scheme.cpp | 118 - .../engines/scheme/abstract_scheme.h | 73 +- .../engines/scheme/column/info.cpp | 90 + .../columnshard/engines/scheme/column/info.h | 61 + .../columnshard/engines/scheme/column/ya.make | 18 + .../engines/scheme/column_features.cpp | 65 - .../engines/scheme/column_features.h | 168 +- .../engines/scheme/filtered_scheme.cpp | 93 - .../engines/scheme/filtered_scheme.h | 32 +- .../columnshard/engines/scheme/index_info.cpp | 180 +- .../columnshard/engines/scheme/index_info.h | 149 +- .../engines/scheme/indexes/abstract/meta.cpp | 20 +- .../engines/scheme/indexes/abstract/meta.h | 24 +- .../engines/scheme/snapshot_scheme.cpp | 52 - .../engines/scheme/snapshot_scheme.h | 38 +- .../scheme/statistics/abstract/common.cpp | 40 + .../scheme/statistics/abstract/common.h | 24 + .../statistics/abstract/constructor.cpp | 5 + .../scheme/statistics/abstract/constructor.h | 73 + .../scheme/statistics/abstract/operator.cpp | 12 + .../scheme/statistics/abstract/operator.h | 124 + .../statistics/abstract/portion_storage.cpp | 119 + .../statistics/abstract/portion_storage.h | 53 + .../scheme/statistics/abstract/ya.make | 20 + .../scheme/statistics/max/constructor.cpp | 45 + .../scheme/statistics/max/constructor.h | 33 + .../scheme/statistics/max/operator.cpp | 41 + .../engines/scheme/statistics/max/operator.h | 64 + .../engines/scheme/statistics/max/ya.make | 15 + .../scheme/statistics/protos/data.proto | 66 + .../engines/scheme/statistics/protos/ya.make | 11 + .../statistics/variability/constructor.cpp | 45 + .../statistics/variability/constructor.h | 33 + .../statistics/variability/operator.cpp | 164 + .../scheme/statistics/variability/operator.h | 67 + .../scheme/statistics/variability/ya.make | 15 + .../engines/scheme/statistics/ya.make | 10 + .../columnshard/engines/scheme/tier_info.cpp | 20 - .../tx/columnshard/engines/scheme/tier_info.h | 167 +- .../engines/scheme/tiering/common.cpp | 5 + .../engines/scheme/tiering/common.h | 9 + .../engines/scheme/tiering/tier_info.cpp | 42 + .../engines/scheme/tiering/tier_info.h | 215 ++ .../engines/scheme/tiering/ya.make | 12 + .../scheme/versions/abstract_scheme.cpp | 141 + .../engines/scheme/versions/abstract_scheme.h | 57 + .../scheme/versions/filtered_scheme.cpp | 94 + .../engines/scheme/versions/filtered_scheme.h | 32 + .../scheme/versions/snapshot_scheme.cpp | 53 + .../engines/scheme/versions/snapshot_scheme.h | 38 + .../scheme/versions/versioned_index.cpp | 31 + .../engines/scheme/versions/versioned_index.h | 68 + .../engines/scheme/versions/ya.make | 14 + .../tx/columnshard/engines/scheme/ya.make | 7 + .../storage/actualizer/abstract/abstract.cpp | 5 + .../storage/actualizer/abstract/abstract.h | 30 + .../storage/actualizer/abstract/context.cpp | 5 + .../storage/actualizer/abstract/context.h | 59 + .../storage/actualizer/abstract/ya.make | 12 + .../storage/actualizer/common/address.cpp | 33 + .../storage/actualizer/common/address.h | 34 + .../engines/storage/actualizer/common/ya.make | 11 + .../storage/actualizer/counters/counters.cpp | 5 + .../storage/actualizer/counters/counters.h | 109 + .../storage/actualizer/counters/ya.make | 13 + .../storage/actualizer/index/index.cpp | 54 + .../engines/storage/actualizer/index/index.h | 37 + .../engines/storage/actualizer/index/ya.make | 11 + .../storage/actualizer/scheme/counters.cpp | 5 + .../storage/actualizer/scheme/counters.h | 47 + .../storage/actualizer/scheme/scheme.cpp | 117 + .../storage/actualizer/scheme/scheme.h | 73 + .../engines/storage/actualizer/scheme/ya.make | 12 + .../storage/actualizer/tiering/counters.cpp | 5 + .../storage/actualizer/tiering/counters.h | 78 + .../storage/actualizer/tiering/tiering.cpp | 184 + .../storage/actualizer/tiering/tiering.h | 142 + .../storage/actualizer/tiering/ya.make | 12 + .../engines/storage/actualizer/ya.make | 12 + .../engines/storage/chunks/column.cpp | 18 + .../engines/storage/chunks/column.h | 68 + .../engines/storage/chunks/data.cpp | 11 + .../columnshard/engines/storage/chunks/data.h | 48 + .../engines/storage/chunks/null_column.cpp | 5 + .../engines/storage/chunks/null_column.h | 51 + .../engines/storage/chunks/ya.make | 17 + .../columnshard/engines/storage/granule.cpp | 71 +- .../tx/columnshard/engines/storage/granule.h | 143 +- .../storage/optimizer/abstract/optimizer.cpp | 11 +- .../storage/optimizer/abstract/optimizer.h | 18 +- .../storage/optimizer/intervals/blob_size.cpp | 43 - .../storage/optimizer/intervals/blob_size.h | 145 - .../storage/optimizer/intervals/counters.cpp | 5 - .../storage/optimizer/intervals/counters.h | 98 - .../storage/optimizer/intervals/optimizer.cpp | 212 -- .../storage/optimizer/intervals/optimizer.h | 256 -- .../storage/optimizer/intervals/ya.make | 16 - .../storage/optimizer/lbuckets/counters.cpp | 2 +- .../storage/optimizer/lbuckets/counters.h | 4 +- .../storage/optimizer/lbuckets/optimizer.h | 181 +- .../storage/optimizer/levels/counters.cpp | 5 - .../storage/optimizer/levels/counters.h | 107 - .../storage/optimizer/levels/optimizer.cpp | 5 - .../storage/optimizer/levels/optimizer.h | 523 --- .../engines/storage/optimizer/levels/ya.make | 15 - .../engines/storage/optimizer/ya.make | 2 - .../columnshard/engines/storage/storage.cpp | 55 +- .../tx/columnshard/engines/storage/storage.h | 140 +- .../tx/columnshard/engines/storage/ya.make | 2 + .../engines/ut/ut_insert_table.cpp | 4 +- .../columnshard/engines/ut/ut_logs_engine.cpp | 231 +- .../tx/columnshard/engines/ut/ut_program.cpp | 49 +- ydb/core/tx/columnshard/engines/ut/ya.make | 1 + .../engines/writer/blob_constructor.cpp | 12 + .../engines/writer/blob_constructor.h | 15 +- .../engines/writer/buffer/actor.cpp | 1 + .../columnshard/engines/writer/buffer/ya.make | 2 + .../writer/compacted_blob_constructor.cpp | 18 +- .../writer/compacted_blob_constructor.h | 4 + .../engines/writer/indexed_blob_constructor.h | 7 +- .../engines/writer/write_controller.cpp | 20 + .../engines/writer/write_controller.h | 40 +- ydb/core/tx/columnshard/engines/ya.make | 1 + .../columnshard/export/actor/export_actor.cpp | 36 + .../columnshard/export/actor/export_actor.h | 118 + .../tx/columnshard/export/actor/write.cpp | 26 + ydb/core/tx/columnshard/export/actor/write.h | 17 + ydb/core/tx/columnshard/export/actor/ya.make | 16 + .../columnshard/export/common/identifier.cpp | 49 + .../tx/columnshard/export/common/identifier.h | 48 + ydb/core/tx/columnshard/export/common/ya.make | 13 + .../tx/columnshard/export/events/events.cpp | 5 + .../tx/columnshard/export/events/events.h | 38 + ydb/core/tx/columnshard/export/events/ya.make | 13 + .../tx/columnshard/export/manager/manager.cpp | 67 + .../tx/columnshard/export/manager/manager.h | 58 + .../tx/columnshard/export/manager/ya.make | 14 + .../tx/columnshard/export/protos/cursor.proto | 7 + .../columnshard/export/protos/selector.proto | 17 + .../columnshard/export/protos/storage.proto | 21 + .../tx/columnshard/export/protos/task.proto | 16 + ydb/core/tx/columnshard/export/protos/ya.make | 15 + .../tx/columnshard/export/session/cursor.cpp | 36 + .../tx/columnshard/export/session/cursor.h | 55 + .../session/selector/abstract/selector.cpp | 16 + .../session/selector/abstract/selector.h | 62 + .../export/session/selector/abstract/ya.make | 14 + .../session/selector/backup/selector.cpp | 30 + .../export/session/selector/backup/selector.h | 80 + .../export/session/selector/backup/ya.make | 13 + .../export/session/selector/ya.make | 11 + .../tx/columnshard/export/session/session.cpp | 65 + .../tx/columnshard/export/session/session.h | 98 + .../session/storage/abstract/storage.cpp | 15 + .../export/session/storage/abstract/storage.h | 63 + .../export/session/storage/abstract/ya.make | 15 + .../export/session/storage/s3/storage.cpp | 25 + .../export/session/storage/s3/storage.h | 44 + .../export/session/storage/s3/ya.make | 23 + .../export/session/storage/tier/storage.cpp | 14 + .../export/session/storage/tier/storage.h | 40 + .../export/session/storage/tier/ya.make | 11 + .../export/session/storage/ya.make | 12 + .../tx/columnshard/export/session/task.cpp | 48 + ydb/core/tx/columnshard/export/session/task.h | 49 + .../tx/columnshard/export/session/ya.make | 20 + .../export/transactions/tx_save_cursor.cpp | 23 + .../export/transactions/tx_save_cursor.h | 26 + .../columnshard/export/transactions/ya.make | 14 + ydb/core/tx/columnshard/export/ya.make | 16 + .../tx/columnshard/hooks/abstract/abstract.h | 139 +- .../columnshard/hooks/testing/controller.cpp | 156 +- .../tx/columnshard/hooks/testing/controller.h | 254 +- .../columnshard/inflight_request_tracker.cpp | 90 + .../tx/columnshard/inflight_request_tracker.h | 89 +- .../normalizer/abstract/abstract.cpp | 39 +- .../normalizer/abstract/abstract.h | 125 +- .../columnshard/normalizer/abstract/ya.make | 2 + .../normalizer/granule/normalizer.cpp | 23 +- .../normalizer/granule/normalizer.h | 14 +- .../tx/columnshard/normalizer/granule/ya.make | 2 +- .../columnshard/normalizer/portion/chunks.cpp | 20 +- .../columnshard/normalizer/portion/chunks.h | 14 +- .../columnshard/normalizer/portion/clean.cpp | 91 + .../tx/columnshard/normalizer/portion/clean.h | 39 + .../normalizer/portion/min_max.cpp | 221 -- .../columnshard/normalizer/portion/min_max.h | 35 - .../normalizer/portion/normalizer.cpp | 108 +- .../normalizer/portion/normalizer.h | 33 +- .../normalizer/portion/portion.cpp | 75 + .../columnshard/normalizer/portion/portion.h | 39 + .../tx/columnshard/normalizer/portion/ya.make | 4 +- .../normalizer/tables/normalizer.cpp | 141 + .../normalizer/tables/normalizer.h | 23 + .../tx/columnshard/normalizer/tables/ya.make | 11 + ydb/core/tx/columnshard/operations/write.cpp | 180 +- ydb/core/tx/columnshard/operations/write.h | 26 +- .../tx/columnshard/operations/write_data.cpp | 2 +- .../tx/columnshard/operations/write_data.h | 2 +- ydb/core/tx/columnshard/operations/ya.make | 1 + .../columnshard/resource_subscriber/actor.cpp | 52 +- .../columnshard/resource_subscriber/actor.h | 1 + .../columnshard/resource_subscriber/task.cpp | 16 +- .../tx/columnshard/resource_subscriber/task.h | 1 + .../splitter/abstract/chunk_meta.cpp | 27 + .../splitter/abstract/chunk_meta.h | 44 + .../abstract/chunks.cpp} | 2 +- .../tx/columnshard/splitter/abstract/chunks.h | 125 + .../tx/columnshard/splitter/abstract/ya.make | 13 + .../tx/columnshard/splitter/batch_slice.cpp | 175 +- .../tx/columnshard/splitter/batch_slice.h | 85 +- ydb/core/tx/columnshard/splitter/blob_info.h | 7 + .../tx/columnshard/splitter/chunk_meta.cpp | 26 - ydb/core/tx/columnshard/splitter/chunk_meta.h | 53 +- ydb/core/tx/columnshard/splitter/chunks.cpp | 5 +- ydb/core/tx/columnshard/splitter/chunks.h | 90 +- .../tx/columnshard/splitter/rb_splitter.cpp | 71 - .../tx/columnshard/splitter/rb_splitter.h | 69 - .../tx/columnshard/splitter/scheme_info.cpp | 8 + .../tx/columnshard/splitter/scheme_info.h | 6 +- ydb/core/tx/columnshard/splitter/settings.cpp | 2 +- ydb/core/tx/columnshard/splitter/settings.h | 101 +- .../columnshard/splitter/similar_packer.cpp | 5 + .../tx/columnshard/splitter/similar_packer.h | 81 + ydb/core/tx/columnshard/splitter/simple.h | 4 + .../columnshard/splitter/ut/ut_splitter.cpp | 193 +- ydb/core/tx/columnshard/splitter/ut/ya.make | 4 + ydb/core/tx/columnshard/splitter/ya.make | 5 +- ydb/core/tx/columnshard/tables_manager.cpp | 160 +- ydb/core/tx/columnshard/tables_manager.h | 123 +- .../columnshard/test_helper/controllers.cpp | 22 + .../tx/columnshard/test_helper/controllers.h | 57 + .../tx/columnshard/test_helper/helper.cpp | 91 + ydb/core/tx/columnshard/test_helper/helper.h | 75 + ydb/core/tx/columnshard/test_helper/ya.make | 20 + .../transactions/operators/backup.cpp | 67 + .../transactions/operators/backup.h | 29 + .../transactions/operators/ev_write.h | 15 +- .../transactions/operators/schema.h | 74 +- .../transactions/operators/ya.make | 3 + .../transactions/propose_transaction_base.cpp | 37 + .../transactions/propose_transaction_base.h | 22 + .../transactions/tx_controller.cpp | 58 +- .../columnshard/transactions/tx_controller.h | 45 +- ydb/core/tx/columnshard/transactions/ya.make | 2 + ydb/core/tx/columnshard/ut_rw/ut_backup.cpp | 112 + .../ut_rw/ut_columnshard_read_write.cpp | 289 +- .../tx/columnshard/ut_rw/ut_normalizer.cpp | 128 +- ydb/core/tx/columnshard/ut_rw/ya.make | 3 + .../ut_schema/ut_columnshard_schema.cpp | 315 +- ydb/core/tx/columnshard/ut_schema/ya.make | 1 + ydb/core/tx/columnshard/write_actor.cpp | 6 +- ydb/core/tx/columnshard/ya.make | 11 +- .../tx/data_events/columnshard_splitter.h | 2 +- ydb/core/tx/data_events/events.h | 22 +- ydb/core/tx/data_events/payload_helper.h | 28 +- ydb/core/tx/datashard/datashard__kqp_scan.cpp | 8 +- .../datashard/datashard_ut_read_iterator.cpp | 2 +- ydb/core/tx/datashard/datashard_ut_write.cpp | 2 +- .../datashard/datashard_write_operation.cpp | 4 +- .../ut_common/datashard_ut_common.cpp | 4 +- ydb/core/tx/datashard/write_unit.cpp | 2 +- ydb/core/tx/program/program.h | 11 +- ydb/core/tx/schemeshard/common/validation.cpp | 30 + ydb/core/tx/schemeshard/common/validation.h | 13 + ydb/core/tx/schemeshard/common/ya.make | 12 + .../tx/schemeshard/olap/columns/update.cpp | 77 +- ydb/core/tx/schemeshard/olap/columns/update.h | 8 +- .../tx/schemeshard/olap/indexes/schema.cpp | 5 + ydb/core/tx/schemeshard/olap/indexes/schema.h | 1 + .../tx/schemeshard/olap/indexes/update.cpp | 11 +- ydb/core/tx/schemeshard/olap/indexes/update.h | 3 +- .../tx/schemeshard/olap/options/schema.cpp | 22 + ydb/core/tx/schemeshard/olap/options/schema.h | 18 + .../tx/schemeshard/olap/options/update.cpp | 5 + ydb/core/tx/schemeshard/olap/options/update.h | 22 + ydb/core/tx/schemeshard/olap/options/ya.make | 12 + .../tx/schemeshard/olap/schema/schema.cpp | 243 +- ydb/core/tx/schemeshard/olap/schema/schema.h | 11 +- .../tx/schemeshard/olap/schema/update.cpp | 8 + ydb/core/tx/schemeshard/olap/schema/update.h | 7 + ydb/core/tx/schemeshard/olap/schema/ya.make | 2 + .../tx/schemeshard/olap/statistics/schema.cpp | 92 + .../tx/schemeshard/olap/statistics/schema.h | 80 + .../tx/schemeshard/olap/statistics/update.cpp | 35 + .../tx/schemeshard/olap/statistics/update.h | 43 + .../tx/schemeshard/olap/statistics/ya.make | 12 + ydb/core/tx/schemeshard/olap/ya.make | 2 + .../tx/schemeshard/schemeshard_info_types.cpp | 2 +- .../schemeshard/schemeshard_validate_ttl.cpp | 128 +- .../tx/schemeshard/ut_helpers/helpers.cpp | 2 +- ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp | 10 +- ydb/core/tx/schemeshard/ut_olap/ya.make | 2 + ydb/core/tx/schemeshard/ya.make | 1 + ydb/core/tx/tiering/abstract/manager.cpp | 12 + ydb/core/tx/tiering/abstract/manager.h | 18 + ydb/core/tx/tiering/abstract/ya.make | 11 + ydb/core/tx/tiering/manager.cpp | 16 +- ydb/core/tx/tiering/manager.h | 24 +- ydb/core/tx/tiering/rule/manager.cpp | 3 + ydb/core/tx/tiering/rule/object.cpp | 4 +- ydb/core/tx/tiering/tier/manager.cpp | 3 + ydb/core/tx/tiering/ut/ut_tiers.cpp | 21 +- .../tx/tx_proxy/upload_rows_common_impl.cpp | 16 +- .../tx/tx_proxy/upload_rows_common_impl.h | 54 +- ydb/core/wrappers/fake_storage.h | 41 +- ydb/core/wrappers/s3_storage_config.cpp | 39 +- ydb/library/actors/core/actor_bootstrapped.h | 2 +- ydb/library/actors/prof/tag.cpp | 12 +- ydb/library/actors/prof/tag.h | 2 +- ydb/library/conclusion/result.h | 10 +- ydb/library/conclusion/status.cpp | 5 + ydb/library/conclusion/status.h | 22 +- ydb/library/conclusion/ya.make | 1 + ydb/library/services/services.proto | 1 + .../ydb_cli/commands/ydb_service_scheme.cpp | 3 + .../lib/ydb_cli/commands/ydb_service_scheme.h | 1 + .../lib/ydb_cli/common/recursive_remove.cpp | 32 +- .../lib/ydb_cli/common/scheme_printers.cpp | 49 +- .../lib/ydb_cli/common/scheme_printers.h | 4 +- ydb/services/bg_tasks/abstract/interface.h | 2 +- ydb/services/metadata/common/ya.make | 2 +- ydb/services/metadata/manager/abstract.h | 2 +- ydb/services/metadata/secret/secret.h | 4 +- ydb/services/ydb/ydb_logstore_ut.cpp | 14 +- ydb/services/ydb/ydb_olapstore_ut.cpp | 72 +- .../queries-original-plan-column-0 | 83 + .../queries-original-plan-column-1 | 125 +- .../queries-original-plan-column-10 | 143 +- .../queries-original-plan-column-11 | 147 +- .../queries-original-plan-column-12 | 126 +- .../queries-original-plan-column-13 | 143 +- .../queries-original-plan-column-14 | 130 +- .../queries-original-plan-column-15 | 86 + .../queries-original-plan-column-16 | 90 + .../queries-original-plan-column-17 | 89 + .../queries-original-plan-column-18 | 94 + .../queries-original-plan-column-19 | 113 +- .../queries-original-plan-column-2 | 267 +- .../queries-original-plan-column-20 | 125 +- .../queries-original-plan-column-21 | 173 +- .../queries-original-plan-column-22 | 491 ++- .../queries-original-plan-column-23 | 530 ++- .../queries-original-plan-column-24 | 118 +- .../queries-original-plan-column-25 | 113 +- .../queries-original-plan-column-26 | 117 +- .../queries-original-plan-column-27 | 142 +- .../queries-original-plan-column-28 | 138 +- .../queries-original-plan-column-29 | 163 +- .../queries-original-plan-column-3 | 106 + .../queries-original-plan-column-30 | 139 +- .../queries-original-plan-column-31 | 139 +- .../queries-original-plan-column-32 | 98 + .../queries-original-plan-column-33 | 86 + .../queries-original-plan-column-34 | 90 + .../queries-original-plan-column-35 | 86 + .../queries-original-plan-column-36 | 313 +- .../queries-original-plan-column-37 | 313 +- .../queries-original-plan-column-38 | 338 +- .../queries-original-plan-column-39 | 240 +- .../queries-original-plan-column-4 | 95 + .../queries-original-plan-column-40 | 285 +- .../queries-original-plan-column-41 | 342 +- .../queries-original-plan-column-42 | 281 +- .../queries-original-plan-column-5 | 95 + .../queries-original-plan-column-6 | 211 +- .../queries-original-plan-column-7 | 126 +- .../queries-original-plan-column-8 | 103 + .../queries-original-plan-column-9 | 181 + .../queries-original-plan-row-0 | 66 + .../queries-original-plan-row-1 | 80 + .../queries-original-plan-row-10 | 98 + .../queries-original-plan-row-11 | 99 + .../queries-original-plan-row-12 | 84 + .../queries-original-plan-row-13 | 98 + .../queries-original-plan-row-14 | 85 + .../queries-original-plan-row-15 | 72 + .../queries-original-plan-row-16 | 73 + .../queries-original-plan-row-17 | 72 + .../queries-original-plan-row-18 | 74 + .../queries-original-plan-row-19 | 70 + .../queries-original-plan-row-2 | 137 +- .../queries-original-plan-row-20 | 80 + .../queries-original-plan-row-21 | 85 + .../queries-original-plan-row-22 | 159 + .../queries-original-plan-row-23 | 174 + .../queries-original-plan-row-24 | 71 + .../queries-original-plan-row-25 | 71 + .../queries-original-plan-row-26 | 72 + .../queries-original-plan-row-27 | 97 + .../queries-original-plan-row-28 | 96 + .../queries-original-plan-row-29 | 135 +- .../queries-original-plan-row-3 | 68 + .../queries-original-plan-row-30 | 88 + .../queries-original-plan-row-31 | 88 + .../queries-original-plan-row-32 | 75 + .../queries-original-plan-row-33 | 72 + .../queries-original-plan-row-34 | 73 + .../queries-original-plan-row-35 | 72 + .../queries-original-plan-row-36 | 88 + .../queries-original-plan-row-37 | 88 + .../queries-original-plan-row-38 | 113 + .../queries-original-plan-row-39 | 115 + .../queries-original-plan-row-4 | 81 + .../queries-original-plan-row-40 | 113 + .../queries-original-plan-row-41 | 114 + .../queries-original-plan-row-42 | 112 + .../queries-original-plan-row-5 | 81 + .../queries-original-plan-row-6 | 135 +- .../queries-original-plan-row-7 | 84 + .../queries-original-plan-row-8 | 86 + .../queries-original-plan-row-9 | 135 + 921 files changed, 44977 insertions(+), 17007 deletions(-) create mode 100644 ydb/core/formats/arrow/common/accessor.cpp create mode 100644 ydb/core/formats/arrow/common/accessor.h create mode 100644 ydb/core/formats/arrow/common/adapter.cpp create mode 100644 ydb/core/formats/arrow/common/adapter.h create mode 100644 ydb/core/formats/arrow/common/container.cpp create mode 100644 ydb/core/formats/arrow/common/container.h delete mode 100644 ydb/core/formats/arrow/merging_sorted_input_stream.cpp delete mode 100644 ydb/core/formats/arrow/merging_sorted_input_stream.h delete mode 100644 ydb/core/formats/arrow/one_batch_input_stream.h create mode 100644 ydb/core/formats/arrow/reader/batch_iterator.cpp create mode 100644 ydb/core/formats/arrow/reader/batch_iterator.h create mode 100644 ydb/core/formats/arrow/reader/heap.cpp create mode 100644 ydb/core/formats/arrow/reader/heap.h rename ydb/core/{tx/columnshard/engines/reader/read_filter_merger.cpp => formats/arrow/reader/merger.cpp} (71%) create mode 100644 ydb/core/formats/arrow/reader/merger.h rename ydb/core/formats/arrow/reader/{read_filter_merger.cpp => position.cpp} (75%) rename ydb/core/formats/arrow/reader/{read_filter_merger.h => position.h} (83%) create mode 100644 ydb/core/formats/arrow/reader/result_builder.cpp create mode 100644 ydb/core/formats/arrow/reader/result_builder.h delete mode 100644 ydb/core/formats/arrow/sort_cursor.h create mode 100644 ydb/core/formats/arrow/switch/compare.cpp create mode 100644 ydb/core/formats/arrow/switch/compare.h rename ydb/core/formats/arrow/{ => ut}/ut_arrow.cpp (79%) rename ydb/core/formats/arrow/{ => ut}/ut_program_step.cpp (99%) create mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp create mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h create mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_opt.cpp create mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_opt.h create mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp create mode 100644 ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h create mode 100644 ydb/core/kqp/ut/olap/aggregations_ut.cpp create mode 100644 ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp create mode 100644 ydb/core/kqp/ut/olap/clickbench_ut.cpp create mode 100644 ydb/core/kqp/ut/olap/helpers/aggregation.cpp create mode 100644 ydb/core/kqp/ut/olap/helpers/aggregation.h create mode 100644 ydb/core/kqp/ut/olap/helpers/get_value.cpp create mode 100644 ydb/core/kqp/ut/olap/helpers/get_value.h create mode 100644 ydb/core/kqp/ut/olap/helpers/local.cpp create mode 100644 ydb/core/kqp/ut/olap/helpers/local.h create mode 100644 ydb/core/kqp/ut/olap/helpers/query_executor.cpp create mode 100644 ydb/core/kqp/ut/olap/helpers/query_executor.h create mode 100644 ydb/core/kqp/ut/olap/helpers/typed_local.cpp create mode 100644 ydb/core/kqp/ut/olap/helpers/typed_local.h create mode 100644 ydb/core/kqp/ut/olap/helpers/writer.cpp create mode 100644 ydb/core/kqp/ut/olap/helpers/writer.h create mode 100644 ydb/core/kqp/ut/olap/helpers/ya.make create mode 100644 ydb/core/kqp/ut/olap/indexes_ut.cpp create mode 100644 ydb/core/kqp/ut/olap/statistics_ut.cpp create mode 100644 ydb/core/kqp/ut/olap/sys_view_ut.cpp create mode 100644 ydb/core/kqp/ut/olap/write_ut.cpp create mode 100644 ydb/core/tx/columnshard/blobs_action/abstract/blob_set.cpp create mode 100644 ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h create mode 100644 ydb/core/tx/columnshard/blobs_action/abstract/gc_actor.cpp create mode 100644 ydb/core/tx/columnshard/blobs_action/abstract/gc_actor.h rename ydb/core/tx/columnshard/{ => blobs_action/bs}/blob_manager.cpp (52%) rename ydb/core/tx/columnshard/{ => blobs_action/bs}/blob_manager.h (68%) create mode 100644 ydb/core/tx/columnshard/blobs_action/common/const.cpp create mode 100644 ydb/core/tx/columnshard/blobs_action/common/const.h create mode 100644 ydb/core/tx/columnshard/blobs_action/common/ya.make create mode 100644 ydb/core/tx/columnshard/blobs_action/events/delete_blobs.cpp create mode 100644 ydb/core/tx/columnshard/blobs_action/events/delete_blobs.h create mode 100644 ydb/core/tx/columnshard/blobs_action/events/ya.make delete mode 100644 ydb/core/tx/columnshard/blobs_action/memory.h create mode 100644 ydb/core/tx/columnshard/blobs_action/protos/blobs.proto create mode 100644 ydb/core/tx/columnshard/blobs_action/protos/events.proto create mode 100644 ydb/core/tx/columnshard/blobs_action/protos/ya.make create mode 100644 ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp create mode 100644 ydb/core/tx/columnshard/blobs_action/storages_manager/manager.h create mode 100644 ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make create mode 100644 ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.cpp create mode 100644 ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h delete mode 100644 ydb/core/tx/columnshard/columnshard__read_base.h delete mode 100644 ydb/core/tx/columnshard/columnshard__stats_scan.cpp delete mode 100644 ydb/core/tx/columnshard/columnshard__stats_scan.h create mode 100644 ydb/core/tx/columnshard/common/blob.cpp create mode 100644 ydb/core/tx/columnshard/common/blob.h create mode 100644 ydb/core/tx/columnshard/common/protos/blob_range.proto create mode 100644 ydb/core/tx/columnshard/common/tablet_id.cpp create mode 100644 ydb/core/tx/columnshard/common/tablet_id.h create mode 100644 ydb/core/tx/columnshard/config.clang-format create mode 100644 ydb/core/tx/columnshard/counters/common/histogram.cpp create mode 100644 ydb/core/tx/columnshard/counters/common/histogram.h create mode 100644 ydb/core/tx/columnshard/data_locks/locks/abstract.cpp create mode 100644 ydb/core/tx/columnshard/data_locks/locks/abstract.h create mode 100644 ydb/core/tx/columnshard/data_locks/locks/composite.cpp create mode 100644 ydb/core/tx/columnshard/data_locks/locks/composite.h create mode 100644 ydb/core/tx/columnshard/data_locks/locks/list.cpp create mode 100644 ydb/core/tx/columnshard/data_locks/locks/list.h create mode 100644 ydb/core/tx/columnshard/data_locks/locks/snapshot.cpp create mode 100644 ydb/core/tx/columnshard/data_locks/locks/snapshot.h create mode 100644 ydb/core/tx/columnshard/data_locks/locks/ya.make create mode 100644 ydb/core/tx/columnshard/data_locks/manager/manager.cpp create mode 100644 ydb/core/tx/columnshard/data_locks/manager/manager.h create mode 100644 ydb/core/tx/columnshard/data_locks/manager/ya.make create mode 100644 ydb/core/tx/columnshard/data_locks/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/common/context/context.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/common/context/context.h create mode 100644 ydb/core/tx/columnshard/data_sharing/common/context/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/common/session/common.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/common/session/common.h create mode 100644 ydb/core/tx/columnshard/data_sharing/common/session/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h create mode 100644 ydb/core/tx/columnshard/data_sharing/common/transactions/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/common/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/events/control.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/events/control.h create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/events/status.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/events/status.h create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/events/transfer.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/events/transfer.h create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/events/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/session/destination.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/session/destination.h create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/session/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_data_from_source.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_data_from_source.h create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_ack_from_initiator.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_ack_from_initiator.h create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_from_source.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_from_source.h create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_start_from_initiator.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_start_from_initiator.h create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/transactions/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/destination/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/controller/abstract.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/controller/abstract.h create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/controller/test.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/controller/test.h create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/controller/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/status/abstract.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/status/abstract.h create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/status/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/initiator/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/manager/sessions.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/manager/sessions.h create mode 100644 ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.h create mode 100644 ydb/core/tx/columnshard/data_sharing/manager/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/events/change_owning.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/events/change_owning.h create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/events/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/tasks/modification.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/tasks/modification.h create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/tasks/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/transactions/tx_change_blobs_owning.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/transactions/tx_change_blobs_owning.h create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/transactions/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/modification/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/protos/data.proto create mode 100644 ydb/core/tx/columnshard/data_sharing/protos/events.proto create mode 100644 ydb/core/tx/columnshard/data_sharing/protos/initiator.proto create mode 100644 ydb/core/tx/columnshard/data_sharing/protos/links.proto create mode 100644 ydb/core/tx/columnshard/data_sharing/protos/sessions.proto create mode 100644 ydb/core/tx/columnshard/data_sharing/protos/transfer.proto create mode 100644 ydb/core/tx/columnshard/data_sharing/protos/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/source/events/control.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/source/events/control.h create mode 100644 ydb/core/tx/columnshard/data_sharing/source/events/transfer.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/source/events/transfer.h create mode 100644 ydb/core/tx/columnshard/data_sharing/source/events/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/source/session/cursor.h create mode 100644 ydb/core/tx/columnshard/data_sharing/source/session/source.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/source/session/source.h create mode 100644 ydb/core/tx/columnshard/data_sharing/source/session/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.h create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/tx_finish_ack_to_source.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/tx_finish_ack_to_source.h create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/tx_start_to_source.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/tx_start_to_source.h create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/tx_write_source_cursor.cpp create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/tx_write_source_cursor.h create mode 100644 ydb/core/tx/columnshard/data_sharing/source/transactions/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/source/ya.make create mode 100644 ydb/core/tx/columnshard/data_sharing/ya.make create mode 100644 ydb/core/tx/columnshard/engines/changes/actualization/construction/context.cpp create mode 100644 ydb/core/tx/columnshard/engines/changes/actualization/construction/context.h create mode 100644 ydb/core/tx/columnshard/engines/changes/actualization/construction/ya.make create mode 100644 ydb/core/tx/columnshard/engines/changes/actualization/controller/controller.cpp create mode 100644 ydb/core/tx/columnshard/engines/changes/actualization/controller/controller.h create mode 100644 ydb/core/tx/columnshard/engines/changes/actualization/controller/ya.make create mode 100644 ydb/core/tx/columnshard/engines/changes/actualization/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/changes/cleanup.cpp create mode 100644 ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp rename ydb/core/tx/columnshard/engines/changes/{cleanup.h => cleanup_portions.h} (63%) create mode 100644 ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp create mode 100644 ydb/core/tx/columnshard/engines/changes/cleanup_tables.h create mode 100644 ydb/core/tx/columnshard/engines/portions/index_chunk.cpp create mode 100644 ydb/core/tx/columnshard/engines/portions/index_chunk.h create mode 100644 ydb/core/tx/columnshard/engines/reader/abstract/abstract.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/abstract/abstract.h rename ydb/core/tx/columnshard/{columnshard__read_base.cpp => engines/reader/abstract/constructor.cpp} (50%) create mode 100644 ydb/core/tx/columnshard/engines/reader/abstract/constructor.h create mode 100644 ydb/core/tx/columnshard/engines/reader/abstract/read_context.cpp rename ydb/core/tx/columnshard/engines/reader/{ => abstract}/read_context.h (80%) create mode 100644 ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h create mode 100644 ydb/core/tx/columnshard/engines/reader/abstract/ya.make create mode 100644 ydb/core/tx/columnshard/engines/reader/actor/actor.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/actor/actor.h create mode 100644 ydb/core/tx/columnshard/engines/reader/actor/ya.make create mode 100644 ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp rename ydb/core/tx/columnshard/engines/reader/{ => common}/conveyor_task.h (70%) rename ydb/core/tx/columnshard/engines/reader/{ => common}/description.cpp (100%) rename ydb/core/tx/columnshard/engines/reader/{ => common}/description.h (93%) rename ydb/core/tx/columnshard/engines/reader/{ => common}/queue.cpp (100%) rename ydb/core/tx/columnshard/engines/reader/{ => common}/queue.h (100%) create mode 100644 ydb/core/tx/columnshard/engines/reader/common/result.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/common/result.h create mode 100644 ydb/core/tx/columnshard/engines/reader/common/stats.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/common/stats.h create mode 100644 ydb/core/tx/columnshard/engines/reader/common/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/reader/conveyor_task.cpp delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.h create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.h create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/context.cpp delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/context.h delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/fetching.cpp delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/fetching.h delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/interval.cpp delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/interval.h rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/columns_set.cpp (89%) rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/columns_set.h (88%) rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/constructor.cpp (62%) rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/constructor.h (54%) create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/fetched_data.cpp (53%) rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/fetched_data.h (62%) create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h rename ydb/core/tx/columnshard/{columnshard__index_scan.cpp => engines/reader/plain_reader/iterator/iterator.cpp} (64%) rename ydb/core/tx/columnshard/{columnshard__index_scan.h => engines/reader/plain_reader/iterator/iterator.h} (54%) create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/plain_read_data.cpp (86%) rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/plain_read_data.h (65%) create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/source.cpp (59%) rename ydb/core/tx/columnshard/engines/reader/plain_reader/{ => iterator}/source.h (51%) create mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.cpp delete mode 100644 ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.h delete mode 100644 ydb/core/tx/columnshard/engines/reader/read_context.cpp delete mode 100644 ydb/core/tx/columnshard/engines/reader/read_filter_merger.h delete mode 100644 ydb/core/tx/columnshard/engines/reader/read_metadata.cpp delete mode 100644 ydb/core/tx/columnshard/engines/reader/read_metadata.h create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/abstract/granule_view.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/abstract/granule_view.h create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.h create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/abstract/ya.make create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/chunks/ya.make create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.h create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/constructor/ya.make create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.h create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/granules/ya.make create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.h create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/portions/ya.make create mode 100644 ydb/core/tx/columnshard/engines/reader/sys_view/ya.make create mode 100644 ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp create mode 100644 ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h create mode 100644 ydb/core/tx/columnshard/engines/reader/transaction/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/index_info.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/loader.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/saver.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/abstract/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/column/info.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/column/info.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/column/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/statistics/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/tiering/common.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/tiering/common.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/tiering/ya.make create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.cpp create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h create mode 100644 ydb/core/tx/columnshard/engines/scheme/versions/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/abstract/abstract.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/abstract/abstract.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/abstract/context.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/abstract/context.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/abstract/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/common/address.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/common/address.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/common/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/counters/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/index/index.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/index/index.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/index/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/scheme/scheme.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/scheme/scheme.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/scheme/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.h create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/tiering/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/actualizer/ya.make create mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/column.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/column.h create mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/data.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/data.h create mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp create mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/null_column.h create mode 100644 ydb/core/tx/columnshard/engines/storage/chunks/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.cpp delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.h delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/intervals/counters.cpp delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/intervals/counters.h delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.cpp delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.h delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/intervals/ya.make delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/levels/counters.cpp delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/levels/counters.h delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.cpp delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.h delete mode 100644 ydb/core/tx/columnshard/engines/storage/optimizer/levels/ya.make create mode 100644 ydb/core/tx/columnshard/export/actor/export_actor.cpp create mode 100644 ydb/core/tx/columnshard/export/actor/export_actor.h create mode 100644 ydb/core/tx/columnshard/export/actor/write.cpp create mode 100644 ydb/core/tx/columnshard/export/actor/write.h create mode 100644 ydb/core/tx/columnshard/export/actor/ya.make create mode 100644 ydb/core/tx/columnshard/export/common/identifier.cpp create mode 100644 ydb/core/tx/columnshard/export/common/identifier.h create mode 100644 ydb/core/tx/columnshard/export/common/ya.make create mode 100644 ydb/core/tx/columnshard/export/events/events.cpp create mode 100644 ydb/core/tx/columnshard/export/events/events.h create mode 100644 ydb/core/tx/columnshard/export/events/ya.make create mode 100644 ydb/core/tx/columnshard/export/manager/manager.cpp create mode 100644 ydb/core/tx/columnshard/export/manager/manager.h create mode 100644 ydb/core/tx/columnshard/export/manager/ya.make create mode 100644 ydb/core/tx/columnshard/export/protos/cursor.proto create mode 100644 ydb/core/tx/columnshard/export/protos/selector.proto create mode 100644 ydb/core/tx/columnshard/export/protos/storage.proto create mode 100644 ydb/core/tx/columnshard/export/protos/task.proto create mode 100644 ydb/core/tx/columnshard/export/protos/ya.make create mode 100644 ydb/core/tx/columnshard/export/session/cursor.cpp create mode 100644 ydb/core/tx/columnshard/export/session/cursor.h create mode 100644 ydb/core/tx/columnshard/export/session/selector/abstract/selector.cpp create mode 100644 ydb/core/tx/columnshard/export/session/selector/abstract/selector.h create mode 100644 ydb/core/tx/columnshard/export/session/selector/abstract/ya.make create mode 100644 ydb/core/tx/columnshard/export/session/selector/backup/selector.cpp create mode 100644 ydb/core/tx/columnshard/export/session/selector/backup/selector.h create mode 100644 ydb/core/tx/columnshard/export/session/selector/backup/ya.make create mode 100644 ydb/core/tx/columnshard/export/session/selector/ya.make create mode 100644 ydb/core/tx/columnshard/export/session/session.cpp create mode 100644 ydb/core/tx/columnshard/export/session/session.h create mode 100644 ydb/core/tx/columnshard/export/session/storage/abstract/storage.cpp create mode 100644 ydb/core/tx/columnshard/export/session/storage/abstract/storage.h create mode 100644 ydb/core/tx/columnshard/export/session/storage/abstract/ya.make create mode 100644 ydb/core/tx/columnshard/export/session/storage/s3/storage.cpp create mode 100644 ydb/core/tx/columnshard/export/session/storage/s3/storage.h create mode 100644 ydb/core/tx/columnshard/export/session/storage/s3/ya.make create mode 100644 ydb/core/tx/columnshard/export/session/storage/tier/storage.cpp create mode 100644 ydb/core/tx/columnshard/export/session/storage/tier/storage.h create mode 100644 ydb/core/tx/columnshard/export/session/storage/tier/ya.make create mode 100644 ydb/core/tx/columnshard/export/session/storage/ya.make create mode 100644 ydb/core/tx/columnshard/export/session/task.cpp create mode 100644 ydb/core/tx/columnshard/export/session/task.h create mode 100644 ydb/core/tx/columnshard/export/session/ya.make create mode 100644 ydb/core/tx/columnshard/export/transactions/tx_save_cursor.cpp create mode 100644 ydb/core/tx/columnshard/export/transactions/tx_save_cursor.h create mode 100644 ydb/core/tx/columnshard/export/transactions/ya.make create mode 100644 ydb/core/tx/columnshard/export/ya.make create mode 100644 ydb/core/tx/columnshard/inflight_request_tracker.cpp create mode 100644 ydb/core/tx/columnshard/normalizer/portion/clean.cpp create mode 100644 ydb/core/tx/columnshard/normalizer/portion/clean.h delete mode 100644 ydb/core/tx/columnshard/normalizer/portion/min_max.cpp delete mode 100644 ydb/core/tx/columnshard/normalizer/portion/min_max.h create mode 100644 ydb/core/tx/columnshard/normalizer/portion/portion.cpp create mode 100644 ydb/core/tx/columnshard/normalizer/portion/portion.h create mode 100644 ydb/core/tx/columnshard/normalizer/tables/normalizer.cpp create mode 100644 ydb/core/tx/columnshard/normalizer/tables/normalizer.h create mode 100644 ydb/core/tx/columnshard/normalizer/tables/ya.make create mode 100644 ydb/core/tx/columnshard/splitter/abstract/chunk_meta.cpp create mode 100644 ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h rename ydb/core/tx/columnshard/{blobs_action/memory.cpp => splitter/abstract/chunks.cpp} (60%) create mode 100644 ydb/core/tx/columnshard/splitter/abstract/chunks.h create mode 100644 ydb/core/tx/columnshard/splitter/abstract/ya.make delete mode 100644 ydb/core/tx/columnshard/splitter/rb_splitter.cpp delete mode 100644 ydb/core/tx/columnshard/splitter/rb_splitter.h create mode 100644 ydb/core/tx/columnshard/splitter/similar_packer.cpp create mode 100644 ydb/core/tx/columnshard/splitter/similar_packer.h create mode 100644 ydb/core/tx/columnshard/test_helper/controllers.cpp create mode 100644 ydb/core/tx/columnshard/test_helper/controllers.h create mode 100644 ydb/core/tx/columnshard/test_helper/helper.cpp create mode 100644 ydb/core/tx/columnshard/test_helper/helper.h create mode 100644 ydb/core/tx/columnshard/test_helper/ya.make create mode 100644 ydb/core/tx/columnshard/transactions/operators/backup.cpp create mode 100644 ydb/core/tx/columnshard/transactions/operators/backup.h create mode 100644 ydb/core/tx/columnshard/transactions/propose_transaction_base.cpp create mode 100644 ydb/core/tx/columnshard/transactions/propose_transaction_base.h create mode 100644 ydb/core/tx/columnshard/ut_rw/ut_backup.cpp create mode 100644 ydb/core/tx/schemeshard/common/validation.cpp create mode 100644 ydb/core/tx/schemeshard/common/validation.h create mode 100644 ydb/core/tx/schemeshard/common/ya.make create mode 100644 ydb/core/tx/schemeshard/olap/options/schema.cpp create mode 100644 ydb/core/tx/schemeshard/olap/options/schema.h create mode 100644 ydb/core/tx/schemeshard/olap/options/update.cpp create mode 100644 ydb/core/tx/schemeshard/olap/options/update.h create mode 100644 ydb/core/tx/schemeshard/olap/options/ya.make create mode 100644 ydb/core/tx/schemeshard/olap/statistics/schema.cpp create mode 100644 ydb/core/tx/schemeshard/olap/statistics/schema.h create mode 100644 ydb/core/tx/schemeshard/olap/statistics/update.cpp create mode 100644 ydb/core/tx/schemeshard/olap/statistics/update.h create mode 100644 ydb/core/tx/schemeshard/olap/statistics/ya.make create mode 100644 ydb/core/tx/tiering/abstract/manager.cpp create mode 100644 ydb/core/tx/tiering/abstract/manager.h create mode 100644 ydb/core/tx/tiering/abstract/ya.make diff --git a/.github/config/muted_test.txt b/.github/config/muted_test.txt index 5d653c79c40c..571b4754e87a 100644 --- a/.github/config/muted_test.txt +++ b/.github/config/muted_test.txt @@ -3,8 +3,6 @@ ydb-core-blobstorage-ut_blobstorage/SpaceCheckForDiskReassign::* ydb-services-ydb-sdk_sessions_pool_ut/YdbSdkSessionsPool::StressTestSync10 ydb-tests-functional-kqp-kqp_query_session/KqpQuerySession::NoLocalAttach ydb-core-blobstorage-ut_blobstorage/VDiskAssimilation::Test -ydb-core-tx-columnshard-ut_schema/TColumnShardTestSchema::ForgetAfterFail -ydb-core-tx-columnshard-ut_schema/TColumnShardTestSchema::RebootForgetAfterFail ydb-library-yql-sql-pg-ut/PgSqlParsingAutoparam::AutoParamValues_DifferentTypes ydb-core-blobstorage-ut_blobstorage/[6/10]* ydb/core/blobstorage/ut_blobstorage/Defragmentation::DoesItWork diff --git a/.github/config/muted_ya.txt b/.github/config/muted_ya.txt index 46a896df34a4..1e7f35682e24 100644 --- a/.github/config/muted_ya.txt +++ b/.github/config/muted_ya.txt @@ -19,7 +19,13 @@ ydb/core/kafka_proxy/ut KafkaProtocol.CreatePartitionsScenario ydb/core/kafka_proxy/ut KafkaProtocol.ProduceScenario ydb/core/kqp/provider/ut KikimrIcGateway.TestLoadBasicSecretValueFromExternalDataSourceMetadata ydb/core/kqp/ut/federated_query/generic * -ydb/core/kqp/ut/olap * +ydb/core/kqp/ut/olap KqpOlapAggregations.Json_Exists +ydb/core/kqp/ut/olap KqpOlapIndexes.Indexes +ydb/core/kqp/ut/olap KqpOlapIndexes.IndexesActualization +ydb/core/kqp/ut/olap KqpOlapBlobsSharing.* +ydb/core/kqp/ut/olap KqpOlap.ScanQueryOltpAndOlap +ydb/core/kqp/ut/olap KqpOlapStatistics.StatsUsageWithTTL +ydb/core/kqp/ut/olap KqpOlap.YqlScriptOltpAndOlap ydb/core/kqp/ut/pg KqpPg.CreateIndex ydb/core/kqp/ut/query KqpLimits.QueryReplySize ydb/core/kqp/ut/query KqpQuery.QueryTimeout @@ -32,9 +38,6 @@ ydb/core/kqp/ut/service KqpQueryService.QueryOnClosedSession ydb/core/kqp/ut/service KqpQueryServiceScripts.ForgetScriptExecutionRace ydb/core/kqp/ut/service KqpService.CloseSessionsWithLoad ydb/core/kqp/ut/service [38/50]* -ydb/core/tx/columnshard/ut_schema TColumnShardTestSchema.ForgetAfterFail -ydb/core/tx/columnshard/ut_schema TColumnShardTestSchema.RebootForgetAfterFail -ydb/core/tx/columnshard/engines/ut * ydb/core/tx/coordinator/ut Coordinator.RestoreTenantConfiguration ydb/core/tx/datashard/ut_change_exchange Cdc.InitialScanDebezium ydb/core/tx/replication/ydb_proxy/ut YdbProxyTests.ReadTopic diff --git a/ydb/core/base/blobstorage.h b/ydb/core/base/blobstorage.h index e7bfa37b25b7..f4037f27ca2c 100644 --- a/ydb/core/base/blobstorage.h +++ b/ydb/core/base/blobstorage.h @@ -981,14 +981,16 @@ struct TEvBlobStorage { bool WrittenBeyondBarrier = false; // was this blob written beyond the barrier? mutable NLWTrace::TOrbit Orbit; std::shared_ptr ExecutionRelay; + const TString StorageId; TEvPutResult(NKikimrProto::EReplyStatus status, const TLogoBlobID &id, const TStorageStatusFlags statusFlags, - ui32 groupId, float approximateFreeSpaceShare) + ui32 groupId, float approximateFreeSpaceShare, const TString& storageId = Default()) : Status(status) , Id(id) , StatusFlags(statusFlags) , GroupId(groupId) , ApproximateFreeSpaceShare(approximateFreeSpaceShare) + , StorageId(storageId) {} TString Print(bool isFull) const { diff --git a/ydb/core/formats/arrow/arrow_batch_builder.cpp b/ydb/core/formats/arrow/arrow_batch_builder.cpp index 76c19421555d..1e49120c0c7c 100644 --- a/ydb/core/formats/arrow/arrow_batch_builder.cpp +++ b/ydb/core/formats/arrow/arrow_batch_builder.cpp @@ -1,7 +1,7 @@ #include "arrow_batch_builder.h" +#include "switch/switch_type.h" #include #include - namespace NKikimr::NArrow { namespace { @@ -195,12 +195,18 @@ TArrowBatchBuilder::TArrowBatchBuilder(arrow::Compression::type codec, const std WriteOptions.use_threads = false; } -bool TArrowBatchBuilder::Start(const std::vector>& ydbColumns) { +arrow::Status TArrowBatchBuilder::Start(const std::vector>& ydbColumns) { YdbSchema = ydbColumns; auto schema = MakeArrowSchema(ydbColumns, NotNullColumns); - auto status = arrow::RecordBatchBuilder::Make(schema, arrow::default_memory_pool(), RowsToReserve, &BatchBuilder); + if (!schema.ok()) { + return arrow::Status::FromArgs(schema.status().code(), "Cannot make arrow schema: ", schema.status().ToString()); + } + auto status = arrow::RecordBatchBuilder::Make(*schema, arrow::default_memory_pool(), RowsToReserve, &BatchBuilder); NumRows = NumBytes = 0; - return status.ok(); + if (!status.ok()) { + return arrow::Status::FromArgs(schema.status().code(), "Cannot make arrow builder: ", status.ToString()); + } + return arrow::Status::OK(); } void TArrowBatchBuilder::AppendCell(const TCell& cell, ui32 colNum) { @@ -259,7 +265,7 @@ void TArrowBatchBuilder::ReserveData(ui32 columnNo, size_t size) { Y_ABORT_UNLESS(columnNo < YdbSchema.size()); auto type = YdbSchema[columnNo].second; - SwitchYqlTypeToArrowType(type, [&](const auto& type) { + Y_ABORT_UNLESS(SwitchYqlTypeToArrowType(type, [&](const auto& type) { using TWrap = std::decay_t; using TBuilder = typename arrow::TypeTraits::BuilderType; @@ -270,7 +276,7 @@ void TArrowBatchBuilder::ReserveData(ui32 columnNo, size_t size) { Y_ABORT_UNLESS(status.ok()); } return true; - }); + })); } std::shared_ptr TArrowBatchBuilder::FlushBatch(bool reinitialize) { diff --git a/ydb/core/formats/arrow/arrow_batch_builder.h b/ydb/core/formats/arrow/arrow_batch_builder.h index 2d4e1f3ed6b0..cacf7fd7882f 100644 --- a/ydb/core/formats/arrow/arrow_batch_builder.h +++ b/ydb/core/formats/arrow/arrow_batch_builder.h @@ -2,6 +2,7 @@ #include "arrow_helpers.h" #include #include +#include namespace NKikimr::NArrow { @@ -155,8 +156,11 @@ class TArrowBatchBuilder : public NKikimr::IBlockBuilder { ui64 maxRowsInBlock, ui64 maxBytesInBlock, TString& err) override { Y_UNUSED(maxRowsInBlock); Y_UNUSED(maxBytesInBlock); - Y_UNUSED(err); - return Start(columns); + const auto result = Start(columns); + if (!result.ok()) { + err = result.ToString(); + } + return result.ok(); } void AddRow(const NKikimr::TDbTupleRef& key, const NKikimr::TDbTupleRef& value) override; @@ -175,7 +179,7 @@ class TArrowBatchBuilder : public NKikimr::IBlockBuilder { return NumBytes; } - bool Start(const std::vector>& columns); + arrow::Status Start(const std::vector>& columns); std::shared_ptr FlushBatch(bool reinitialize); std::shared_ptr GetBatch() const { return Batch; } diff --git a/ydb/core/formats/arrow/arrow_filter.cpp b/ydb/core/formats/arrow/arrow_filter.cpp index 162bbb37b540..58cd7116baed 100644 --- a/ydb/core/formats/arrow/arrow_filter.cpp +++ b/ydb/core/formats/arrow/arrow_filter.cpp @@ -1,5 +1,8 @@ #include "arrow_filter.h" #include "switch_type.h" +#include "common/container.h" +#include "common/adapter.h" + #include #include #include @@ -307,7 +310,7 @@ NKikimr::NArrow::TColumnFilter TColumnFilter::MakePredicateFilter(const arrow::D return NArrow::TColumnFilter(std::move(bits)); } -template +template bool ApplyImpl(const TColumnFilter& filter, std::shared_ptr& batch, const std::optional startPos, const std::optional count) { if (!batch || !batch->num_rows()) { return false; @@ -322,33 +325,26 @@ bool ApplyImpl(const TColumnFilter& filter, std::shared_ptr& batch, const } } if (filter.IsTotalDenyFilter()) { - batch = batch->Slice(0, 0); + batch = NAdapter::TDataBuilderPolicy::GetEmptySame(batch); return true; } if (filter.IsTotalAllowFilter()) { return true; } - auto res = arrow::compute::Filter(batch, filter.BuildArrowFilter(batch->num_rows(), startPos, count)); - Y_VERIFY_S(res.ok(), res.status().message()); - Y_ABORT_UNLESS((*res).kind() == kindExpected); - if constexpr (kindExpected == arrow::Datum::TABLE) { - batch = (*res).table(); - return batch->num_rows(); - } - if constexpr (kindExpected == arrow::Datum::RECORD_BATCH) { - batch = (*res).record_batch(); - return batch->num_rows(); - } - AFL_VERIFY(false); - return false; + batch = NAdapter::TDataBuilderPolicy::ApplyArrowFilter(batch, filter.BuildArrowFilter(batch->num_rows(), startPos, count)); + return batch->num_rows(); +} + +bool TColumnFilter::Apply(std::shared_ptr& batch, const std::optional startPos, const std::optional count) const { + return ApplyImpl(*this, batch, startPos, count); } bool TColumnFilter::Apply(std::shared_ptr& batch, const std::optional startPos, const std::optional count) const { - return ApplyImpl(*this, batch, startPos, count); + return ApplyImpl(*this, batch, startPos, count); } bool TColumnFilter::Apply(std::shared_ptr& batch, const std::optional startPos, const std::optional count) const { - return ApplyImpl(*this, batch, startPos, count); + return ApplyImpl(*this, batch, startPos, count); } void TColumnFilter::Apply(const ui32 expectedRecordsCount, std::vector& datums) const { diff --git a/ydb/core/formats/arrow/arrow_filter.h b/ydb/core/formats/arrow/arrow_filter.h index 80e449ef05c1..a83b37a78333 100644 --- a/ydb/core/formats/arrow/arrow_filter.h +++ b/ydb/core/formats/arrow/arrow_filter.h @@ -8,6 +8,8 @@ namespace NKikimr::NArrow { +class TGeneralContainer; + enum class ECompareType { LESS = 1, LESS_OR_EQUAL, @@ -62,6 +64,10 @@ class TColumnFilter { return Filter.capacity() * sizeof(ui32) + Count * sizeof(bool); } + static ui64 GetPredictedMemorySize(const ui32 recordsCount) { + return 2 /* capacity */ * recordsCount * (sizeof(ui32) + sizeof(bool)); + } + class TIterator { private: i64 InternalPosition = 0; @@ -172,6 +178,7 @@ class TColumnFilter { // It makes a filter using composite predicate static TColumnFilter MakePredicateFilter(const arrow::Datum& datum, const arrow::Datum& border, ECompareType compareType); + bool Apply(std::shared_ptr& batch, const std::optional startPos = {}, const std::optional count = {}) const; bool Apply(std::shared_ptr& batch, const std::optional startPos = {}, const std::optional count = {}) const; bool Apply(std::shared_ptr& batch, const std::optional startPos = {}, const std::optional count = {}) const; void Apply(const ui32 expectedRecordsCount, std::vector& datums) const; diff --git a/ydb/core/formats/arrow/arrow_helpers.cpp b/ydb/core/formats/arrow/arrow_helpers.cpp index a49cf23e686e..b71c9a342a81 100644 --- a/ydb/core/formats/arrow/arrow_helpers.cpp +++ b/ydb/core/formats/arrow/arrow_helpers.cpp @@ -1,9 +1,8 @@ #include "arrow_helpers.h" #include "switch_type.h" -#include "one_batch_input_stream.h" #include "common/validation.h" -#include "merging_sorted_input_stream.h" #include "permutations.h" +#include "common/adapter.h" #include "serializer/native.h" #include "serializer/abstract.h" #include "serializer/stream.h" @@ -48,7 +47,7 @@ std::shared_ptr CreateEmptyArrowImpl() { return arrow::duration(arrow::TimeUnit::TimeUnit::MICRO); } -std::shared_ptr GetArrowType(NScheme::TTypeInfo typeId) { +arrow::Result> GetArrowType(NScheme::TTypeInfo typeId) { std::shared_ptr result; bool success = SwitchYqlTypeToArrowType(typeId, [&](TTypeWrapper typeHolder) { Y_UNUSED(typeHolder); @@ -58,10 +57,11 @@ std::shared_ptr GetArrowType(NScheme::TTypeInfo typeId) { if (success) { return result; } - return std::make_shared(); + + return arrow::Status::TypeError("unsupported type ", NKikimr::NScheme::TypeName(typeId.GetTypeId())); } -std::shared_ptr GetCSVArrowType(NScheme::TTypeInfo typeId) { +arrow::Result> GetCSVArrowType(NScheme::TTypeInfo typeId) { std::shared_ptr result; switch (typeId.GetTypeId()) { case NScheme::NTypeIds::Datetime: @@ -75,18 +75,31 @@ std::shared_ptr GetCSVArrowType(NScheme::TTypeInfo typeId) { } } -std::vector> MakeArrowFields(const std::vector>& columns, const std::set& notNullColumns) { +arrow::Result MakeArrowFields(const std::vector>& columns, const std::set& notNullColumns) { std::vector> fields; fields.reserve(columns.size()); + TVector errors; for (auto& [name, ydbType] : columns) { std::string colName(name.data(), name.size()); - fields.emplace_back(std::make_shared(colName, GetArrowType(ydbType), !notNullColumns.contains(colName))); + auto arrowType = GetArrowType(ydbType); + if (arrowType.ok()) { + fields.emplace_back(std::make_shared(colName, arrowType.ValueUnsafe(), !notNullColumns.contains(colName))); + } else { + errors.emplace_back(colName + " error: " + arrowType.status().ToString()); + } + } + if (errors.empty()) { + return fields; } - return fields; + return arrow::Status::TypeError(JoinSeq(", ", errors)); } -std::shared_ptr MakeArrowSchema(const std::vector>& ydbColumns, const std::set& notNullColumns) { - return std::make_shared(MakeArrowFields(ydbColumns, notNullColumns)); +arrow::Result> MakeArrowSchema(const std::vector>& ydbColumns, const std::set& notNullColumns) { + const auto fields = MakeArrowFields(ydbColumns, notNullColumns); + if (fields.ok()) { + return std::make_shared(fields.ValueUnsafe()); + } + return fields.status(); } TString SerializeSchema(const arrow::Schema& schema) { @@ -136,30 +149,31 @@ std::shared_ptr MakeEmptyBatch(const std::shared_ptr - std::shared_ptr ExtractColumnsImpl(const std::shared_ptr& srcBatch, - const std::vector& columnNames) { - std::vector> fields; - fields.reserve(columnNames.size()); - std::vector> columns; - columns.reserve(columnNames.size()); - - auto srcSchema = srcBatch->schema(); - for (auto& name : columnNames) { - int pos = srcSchema->GetFieldIndex(name); - if (pos < 0) { - return {}; - } - fields.push_back(srcSchema->field(pos)); - columns.push_back(srcBatch->column(pos)); - } - return arrow::RecordBatch::Make(std::make_shared(std::move(fields)), srcBatch->num_rows(), std::move(columns)); +template +std::shared_ptr ExtractColumnsImpl(const std::shared_ptr& srcBatch, + const std::vector& columnNames) { + std::vector> fields; + fields.reserve(columnNames.size()); + std::vector::TColumn>> columns; + columns.reserve(columnNames.size()); + + auto srcSchema = srcBatch->schema(); + for (auto& name : columnNames) { + int pos = srcSchema->GetFieldIndex(name); + if (pos < 0) { + return {}; + } + fields.push_back(srcSchema->field(pos)); + columns.push_back(srcBatch->column(pos)); } + + return NAdapter::TDataBuilderPolicy::Build(std::move(fields), std::move(columns), srcBatch->num_rows()); +} } std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, @@ -172,7 +186,19 @@ std::shared_ptr ExtractColumns(const std::shared_ptr ExtractColumnsValidate(const std::shared_ptr& srcBatch, +std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, + const std::vector& columnNames) { + return ExtractColumnsImpl(srcBatch, columnNames); +} + +std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, + const std::vector& columnNames) { + return ExtractColumnsImpl(srcBatch, columnNames); +} + +namespace { +template +std::shared_ptr ExtractColumnsValidateImpl(const std::shared_ptr& srcBatch, const std::vector& columnNames) { if (!srcBatch) { return srcBatch; @@ -182,7 +208,7 @@ std::shared_ptr ExtractColumnsValidate(const std::shared_ptr } std::vector> fields; fields.reserve(columnNames.size()); - std::vector> columns; + std::vector::TColumn>> columns; columns.reserve(columnNames.size()); auto srcSchema = srcBatch->schema(); @@ -193,7 +219,18 @@ std::shared_ptr ExtractColumnsValidate(const std::shared_ptr columns.push_back(srcBatch->column(pos)); } - return arrow::RecordBatch::Make(std::make_shared(std::move(fields)), srcBatch->num_rows(), std::move(columns)); + return NAdapter::TDataBuilderPolicy::Build(std::move(fields), std::move(columns), srcBatch->num_rows()); +} +} + +std::shared_ptr ExtractColumnsValidate(const std::shared_ptr& srcBatch, + const std::vector& columnNames) { + return ExtractColumnsValidateImpl(srcBatch, columnNames); +} + +std::shared_ptr ExtractColumnsValidate(const std::shared_ptr& srcBatch, + const std::vector& columnNames) { + return ExtractColumnsValidateImpl(srcBatch, columnNames); } std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, @@ -259,30 +296,18 @@ std::shared_ptr ExtractExistedColumns(const std::shared_ptr< return arrow::RecordBatch::Make(std::make_shared(std::move(fields)), srcBatch->num_rows(), std::move(columns)); } -std::shared_ptr CombineInTable(const std::vector>& batches) { - auto res = arrow::Table::FromRecordBatches(batches); - if (!res.ok()) { - return nullptr; - } - - res = (*res)->CombineChunks(); - if (!res.ok()) { - return nullptr; - } - - return res.ValueOrDie(); -} - std::shared_ptr CombineBatches(const std::vector>& batches) { if (batches.empty()) { return nullptr; } - auto table = CombineInTable(batches); - return table ? ToBatch(table) : nullptr; + auto table = TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batches)); + return table ? ToBatch(table, true) : nullptr; } std::shared_ptr ToBatch(const std::shared_ptr& tableExt, const bool combine) { - Y_ABORT_UNLESS(tableExt); + if (!tableExt) { + return nullptr; + } std::shared_ptr table; if (combine) { auto res = tableExt->CombineChunks(); @@ -294,74 +319,13 @@ std::shared_ptr ToBatch(const std::shared_ptr& std::vector> columns; columns.reserve(table->num_columns()); for (auto& col : table->columns()) { - Y_ABORT_UNLESS(col->num_chunks() == 1); + AFL_VERIFY(col->num_chunks() == 1)("size", col->num_chunks())("size_bytes", GetTableDataSize(tableExt)) + ("schema", tableExt->schema()->ToString())("size_new", GetTableDataSize(table)); columns.push_back(col->chunk(0)); } return arrow::RecordBatch::Make(table->schema(), table->num_rows(), columns); } -std::shared_ptr CombineSortedBatches(const std::vector>& batches, - const std::shared_ptr& description) { - std::vector streams; - for (auto& batch : batches) { - streams.push_back(std::make_shared(batch)); - } - - auto mergeStream = std::make_shared(streams, description, Max()); - std::shared_ptr batch = mergeStream->Read(); - Y_ABORT_UNLESS(!mergeStream->Read()); - return batch; -} - -std::vector> MergeSortedBatches(const std::vector>& batches, - const std::shared_ptr& description, - size_t maxBatchRows) { - Y_ABORT_UNLESS(maxBatchRows); - ui64 numRows = 0; - std::vector streams; - streams.reserve(batches.size()); - for (auto& batch : batches) { - if (batch->num_rows()) { - numRows += batch->num_rows(); - streams.push_back(std::make_shared(batch)); - } - } - - std::vector> out; - out.reserve(numRows / maxBatchRows + 1); - - auto mergeStream = std::make_shared(streams, description, maxBatchRows); - while (std::shared_ptr batch = mergeStream->Read()) { - Y_ABORT_UNLESS(batch->num_rows()); - out.push_back(batch); - } - return out; -} - -std::vector> SliceSortedBatches(const std::vector>& batches, - const std::shared_ptr& description, - size_t maxBatchRows) { - Y_ABORT_UNLESS(!description->Reverse); - - std::vector streams; - streams.reserve(batches.size()); - for (auto& batch : batches) { - if (batch->num_rows()) { - streams.push_back(std::make_shared(batch)); - } - } - - std::vector> out; - out.reserve(streams.size()); - - auto dedupStream = std::make_shared(streams, description, maxBatchRows, true); - while (std::shared_ptr batch = dedupStream->Read()) { - Y_ABORT_UNLESS(batch->num_rows()); - out.push_back(batch); - } - return out; -} - // Check if the permutation doesn't reorder anything bool IsTrivial(const arrow::UInt64Array& permutation, const ui64 originalLength) { if ((ui64)permutation.length() != originalLength) { @@ -1008,4 +972,11 @@ std::vector> SliceToRecordBatches(const std: return result; } +std::shared_ptr ToTable(const std::shared_ptr& batch) { + if (!batch) { + return nullptr; + } + return TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batch->schema(), {batch})); +} + } diff --git a/ydb/core/formats/arrow/arrow_helpers.h b/ydb/core/formats/arrow/arrow_helpers.h index efa064ee71a3..f4871e126e04 100644 --- a/ydb/core/formats/arrow/arrow_helpers.h +++ b/ydb/core/formats/arrow/arrow_helpers.h @@ -13,8 +13,8 @@ namespace NKikimr::NArrow { using TArrayVec = std::vector>; -std::shared_ptr GetArrowType(NScheme::TTypeInfo typeInfo); -std::shared_ptr GetCSVArrowType(NScheme::TTypeInfo typeId); +arrow::Result> GetArrowType(NScheme::TTypeInfo typeInfo); +arrow::Result> GetCSVArrowType(NScheme::TTypeInfo typeId); template inline bool ArrayEqualValue(const std::shared_ptr& x, const std::shared_ptr& y) { @@ -42,8 +42,8 @@ inline bool ArrayEqualView(const std::shared_ptr& x, const std::sh struct TSortDescription; -std::vector> MakeArrowFields(const std::vector>& columns, const std::set& notNullColumns = {}); -std::shared_ptr MakeArrowSchema(const std::vector>& columns, const std::set& notNullColumns = {}); +arrow::Result MakeArrowFields(const std::vector>& columns, const std::set& notNullColumns = {}); +arrow::Result> MakeArrowSchema(const std::vector>& columns, const std::set& notNullColumns = {}); TString SerializeSchema(const arrow::Schema& schema); std::shared_ptr DeserializeSchema(const TString& str); @@ -54,13 +54,20 @@ TString SerializeBatchNoCompression(const std::shared_ptr& b std::shared_ptr DeserializeBatch(const TString& blob, const std::shared_ptr& schema); std::shared_ptr MakeEmptyBatch(const std::shared_ptr& schema, const ui32 rowsCount = 0); +std::shared_ptr ToTable(const std::shared_ptr& batch); std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, - const std::vector& columnNames); + const std::vector& columnNames); std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, - const std::vector& columnNames); + const std::vector& columnNames); +std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, + const std::vector& columnNames); +std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, + const std::vector& columnNames); +std::shared_ptr ExtractColumnsValidate(const std::shared_ptr& srcBatch, + const std::vector& columnNames); std::shared_ptr ExtractColumnsValidate(const std::shared_ptr& srcBatch, - const std::vector& columnNames); + const std::vector& columnNames); std::shared_ptr ExtractColumns(const std::shared_ptr& srcBatch, const std::shared_ptr& dstSchema, bool addNotExisted = false); @@ -72,18 +79,9 @@ inline std::shared_ptr ExtractExistedColumns(const std::shar return ExtractExistedColumns(srcBatch, dstSchema->fields()); } -std::shared_ptr CombineInTable(const std::vector>& batches); -std::shared_ptr ToBatch(const std::shared_ptr& combinedTable, const bool combine = false); +std::shared_ptr ToBatch(const std::shared_ptr& combinedTable, const bool combine); std::shared_ptr CombineBatches(const std::vector>& batches); -std::shared_ptr CombineSortedBatches(const std::vector>& batches, - const std::shared_ptr& description); std::shared_ptr MergeColumns(const std::vector>& rb); -std::vector> MergeSortedBatches(const std::vector>& batches, - const std::shared_ptr& description, - size_t maxBatchRows); -std::vector> SliceSortedBatches(const std::vector>& batches, - const std::shared_ptr& description, - size_t maxBatchRows = 0); std::vector> ShardingSplit(const std::shared_ptr& batch, const std::vector& sharding, ui32 numShards); @@ -103,8 +101,8 @@ bool MergeBatchColumns(const std::vector>& batches std::shared_ptr SortBatch(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique); bool IsSorted(const std::shared_ptr& batch, - const std::shared_ptr& sortingKey, - bool desc = false); + const std::shared_ptr& sortingKey, + bool desc = false); bool IsSortedAndUnique(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, bool desc = false); diff --git a/ydb/core/formats/arrow/common/accessor.cpp b/ydb/core/formats/arrow/common/accessor.cpp new file mode 100644 index 000000000000..450c3f40cf4c --- /dev/null +++ b/ydb/core/formats/arrow/common/accessor.cpp @@ -0,0 +1,99 @@ +#include "accessor.h" +#include +#include +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +void IChunkedArray::TReader::AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const { + auto address = GetReadChunk(position); + AFL_VERIFY(NArrow::Append(builder, *address.GetArray(), address.GetPosition(), recordSize)); +} + +std::shared_ptr IChunkedArray::TReader::CopyRecord(const ui64 recordIndex) const { + auto address = GetReadChunk(recordIndex); + return NArrow::CopyRecords(address.GetArray(), {address.GetPosition()}); +} + +std::shared_ptr IChunkedArray::TReader::Slice(const ui32 offset, const ui32 count) const { + AFL_VERIFY(offset + count <= (ui64)GetRecordsCount())("offset", offset)("count", count)("length", GetRecordsCount()); + ui32 currentOffset = offset; + ui32 countLeast = count; + std::vector> chunks; + while (countLeast) { + auto address = GetReadChunk(currentOffset); + if (address.GetPosition() + countLeast <= (ui64)address.GetArray()->length()) { + chunks.emplace_back(address.GetArray()->Slice(address.GetPosition(), countLeast)); + break; + } else { + const ui32 deltaCount = address.GetArray()->length() - address.GetPosition(); + chunks.emplace_back(address.GetArray()->Slice(address.GetPosition(), deltaCount)); + AFL_VERIFY(countLeast >= deltaCount); + countLeast -= deltaCount; + currentOffset += deltaCount; + } + } + return std::make_shared(chunks, ChunkedArray->DataType); +} + +TString IChunkedArray::TReader::DebugString(const ui32 position) const { + auto address = GetReadChunk(position); + return NArrow::DebugString(address.GetArray(), address.GetPosition()); +} + +std::partial_ordering IChunkedArray::TReader::CompareColumns(const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition) { + AFL_VERIFY(l.size() == r.size()); + for (ui32 i = 0; i < l.size(); ++i) { + const TAddress lAddress = l[i].GetReadChunk(lPosition); + const TAddress rAddress = r[i].GetReadChunk(rPosition); + auto cmp = lAddress.Compare(rAddress); + if (std::is_neq(cmp)) { + return cmp; + } + } + return std::partial_ordering::equivalent; +} + +IChunkedArray::TAddress IChunkedArray::TReader::GetReadChunk(const ui64 position) const { + AFL_VERIFY(position < ChunkedArray->GetRecordsCount()); + if (CurrentChunkAddress && position < CurrentChunkAddress->GetStartPosition() + CurrentChunkAddress->GetArray()->length() && CurrentChunkAddress->GetStartPosition() <= position) { + } else { + CurrentChunkAddress = ChunkedArray->DoGetChunk(CurrentChunkAddress, position); + } + return IChunkedArray::TAddress(CurrentChunkAddress->GetArray(), position - CurrentChunkAddress->GetStartPosition(), CurrentChunkAddress->GetChunkIndex()); +} + +const std::partial_ordering IChunkedArray::TAddress::Compare(const TAddress& item) const { + return TComparator::TypedCompare(*Array, Position, *item.Array, item.Position); +} + +namespace { +class TChunkAccessor { +private: + std::shared_ptr ChunkedArray; +public: + TChunkAccessor(const std::shared_ptr& chunkedArray) + : ChunkedArray(chunkedArray) + { + + } + ui64 GetChunksCount() const { + return (ui64)ChunkedArray->num_chunks(); + } + ui64 GetChunkLength(const ui32 idx) const { + return (ui64)ChunkedArray->chunk(idx)->length(); + } + std::shared_ptr GetArray(const ui32 idx) const { + return ChunkedArray->chunk(idx); + } +}; +} + +IChunkedArray::TCurrentChunkAddress TTrivialChunkedArray::DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const { + TChunkAccessor accessor(Array); + return SelectChunk(chunkCurrent, position, accessor); +} + +} diff --git a/ydb/core/formats/arrow/common/accessor.h b/ydb/core/formats/arrow/common/accessor.h new file mode 100644 index 000000000000..acd7e3a650c2 --- /dev/null +++ b/ydb/core/formats/arrow/common/accessor.h @@ -0,0 +1,199 @@ +#pragma once +#include +#include + +#include +#include +#include + +namespace NKikimr::NArrow::NAccessor { + +class IChunkedArray { +public: + enum class EType { + Undefined, + Array, + ChunkedArray, + SerializedChunkedArray + }; + + class TCurrentChunkAddress { + private: + YDB_READONLY_DEF(std::shared_ptr, Array); + YDB_READONLY(ui64, StartPosition, 0); + YDB_READONLY(ui64, ChunkIndex, 0); + public: + ui64 GetLength() const { + return Array->length(); + } + + TCurrentChunkAddress(const std::shared_ptr& arr, const ui64 pos, const ui32 chunkIdx) + : Array(arr) + , StartPosition(pos) + , ChunkIndex(chunkIdx) + { + AFL_VERIFY(arr); + AFL_VERIFY(arr->length()); + } + + TString DebugString() const { + return TStringBuilder() + << "start=" << StartPosition << ";" + << "chunk_index=" << ChunkIndex << ";" + << "length=" << Array->length() << ";"; + } + }; + + class TAddress { + private: + YDB_READONLY_DEF(std::shared_ptr, Array); + YDB_READONLY(ui64, Position, 0); + YDB_READONLY(ui64, ChunkIdx, 0); + public: + bool NextPosition() { + if (Position + 1 < (ui32)Array->length()) { + ++Position; + return true; + } + return false; + } + + TAddress(const std::shared_ptr& arr, const ui64 position, const ui64 chunkIdx) + : Array(arr) + , Position(position) + , ChunkIdx(chunkIdx) + { + + } + + const std::partial_ordering Compare(const TAddress& item) const; + }; +private: + YDB_READONLY_DEF(std::shared_ptr, DataType); + YDB_READONLY(ui64, RecordsCount, 0); + YDB_READONLY(EType, Type, EType::Undefined); +protected: + virtual std::shared_ptr DoGetChunkedArray() const = 0; + virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const = 0; + + template + TCurrentChunkAddress SelectChunk(const std::optional& chunkCurrent, const ui64 position, const TChunkAccessor& accessor) const { + if (!chunkCurrent || position >= chunkCurrent->GetStartPosition() + chunkCurrent->GetLength()) { + ui32 startIndex = 0; + ui64 idx = 0; + if (chunkCurrent) { + AFL_VERIFY(chunkCurrent->GetChunkIndex() + 1 < accessor.GetChunksCount()); + startIndex = chunkCurrent->GetChunkIndex() + 1; + idx = chunkCurrent->GetStartPosition() + chunkCurrent->GetLength(); + } + for (ui32 i = startIndex; i < accessor.GetChunksCount(); ++i) { + const ui64 nextIdx = idx + accessor.GetChunkLength(i); + if (idx <= position && position < nextIdx) { + return TCurrentChunkAddress(accessor.GetArray(i), idx, i); + } + idx = nextIdx; + } + } else if (position < chunkCurrent->GetStartPosition()) { + AFL_VERIFY(chunkCurrent->GetChunkIndex() > 0); + ui64 idx = chunkCurrent->GetStartPosition(); + for (i32 i = chunkCurrent->GetChunkIndex() - 1; i >= 0; --i) { + AFL_VERIFY(idx >= accessor.GetChunkLength(i))("idx", idx)("length", accessor.GetChunkLength(i)); + const ui64 nextIdx = idx - accessor.GetChunkLength(i); + if (nextIdx <= position && position < idx) { + return TCurrentChunkAddress(accessor.GetArray(i), nextIdx, i); + } + idx = nextIdx; + } + } + TStringBuilder sb; + ui64 recordsCountChunks = 0; + for (ui32 i = 0; i < accessor.GetChunksCount(); ++i) { + sb << accessor.GetChunkLength(i) << ","; + recordsCountChunks += accessor.GetChunkLength(i); + } + TStringBuilder chunkCurrentInfo; + if (chunkCurrent) { + chunkCurrentInfo << chunkCurrent->DebugString(); + } + AFL_VERIFY(recordsCountChunks == GetRecordsCount())("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); + AFL_VERIFY(false)("pos", position)("count", GetRecordsCount())("chunks_map", sb)("chunk_current", chunkCurrentInfo); + return TCurrentChunkAddress(nullptr, 0, 0); + } + +public: + + class TReader { + private: + std::shared_ptr ChunkedArray; + mutable std::optional CurrentChunkAddress; + public: + TReader(const std::shared_ptr& data) + : ChunkedArray(data) + { + AFL_VERIFY(ChunkedArray); + } + + ui64 GetRecordsCount() const { + return ChunkedArray->GetRecordsCount(); + } + + TAddress GetReadChunk(const ui64 position) const; + static std::partial_ordering CompareColumns(const std::vector& l, const ui64 lPosition, const std::vector& r, const ui64 rPosition); + void AppendPositionTo(arrow::ArrayBuilder& builder, const ui64 position, ui64* recordSize) const; + std::shared_ptr CopyRecord(const ui64 recordIndex) const; + std::shared_ptr Slice(const ui32 offset, const ui32 count) const; + TString DebugString(const ui32 position) const; + }; + + std::shared_ptr GetChunkedArray() const { + return DoGetChunkedArray(); + } + virtual ~IChunkedArray() = default; + + IChunkedArray(const ui64 recordsCount, const EType type, const std::shared_ptr& dataType) + : DataType(dataType) + , RecordsCount(recordsCount) + , Type(type) { + + } +}; + +class TTrivialArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + const std::shared_ptr Array; +protected: + virtual TCurrentChunkAddress DoGetChunk(const std::optional& /*chunkCurrent*/, const ui64 /*position*/) const override { + return TCurrentChunkAddress(Array, 0, 0); + } + virtual std::shared_ptr DoGetChunkedArray() const override { + return std::make_shared(Array); + } + +public: + TTrivialArray(const std::shared_ptr& data) + : TBase(data->length(), EType::Array, data->type()) + , Array(data) { + + } +}; + +class TTrivialChunkedArray: public IChunkedArray { +private: + using TBase = IChunkedArray; + const std::shared_ptr Array; +protected: + virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const override; + virtual std::shared_ptr DoGetChunkedArray() const override { + return Array; + } + +public: + TTrivialChunkedArray(const std::shared_ptr& data) + : TBase(data->length(), EType::ChunkedArray, data->type()) + , Array(data) { + + } +}; + +} diff --git a/ydb/core/formats/arrow/common/adapter.cpp b/ydb/core/formats/arrow/common/adapter.cpp new file mode 100644 index 000000000000..ed02117a2e13 --- /dev/null +++ b/ydb/core/formats/arrow/common/adapter.cpp @@ -0,0 +1,5 @@ +#include "adapter.h" + +namespace NKikimr::NArrow::NAdapter { + +} diff --git a/ydb/core/formats/arrow/common/adapter.h b/ydb/core/formats/arrow/common/adapter.h new file mode 100644 index 000000000000..f3552019a20d --- /dev/null +++ b/ydb/core/formats/arrow/common/adapter.h @@ -0,0 +1,96 @@ +#pragma once +#include "container.h" +#include "accessor.h" +#include "validation.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NArrow::NAdapter { + +template +class TDataBuilderPolicy { +public: +}; + +template <> +class TDataBuilderPolicy { +public: + using TColumn = arrow::Array; + using TAccessor = NAccessor::TTrivialArray; + + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + return TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), field, extCol)); + } + + [[nodiscard]] static std::shared_ptr Build(std::vector>&& fields, std::vector>&& columns, const ui32 count) { + return arrow::RecordBatch::Make(std::make_shared(std::move(fields)), count, std::move(columns)); + } + [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + auto res = arrow::compute::Filter(batch, filter); + Y_VERIFY_S(res.ok(), res.status().message()); + Y_ABORT_UNLESS(res->kind() == arrow::Datum::RECORD_BATCH); + return res->record_batch(); + } + [[nodiscard]] static std::shared_ptr GetEmptySame(const std::shared_ptr& batch) { + return batch->Slice(0, 0); + } + +}; + +template <> +class TDataBuilderPolicy { +public: + using TColumn = arrow::ChunkedArray; + using TAccessor = NAccessor::TTrivialChunkedArray; + [[nodiscard]] static std::shared_ptr Build(std::vector>&& fields, std::vector>&& columns, const ui32 count) { + return arrow::Table::Make(std::make_shared(std::move(fields)), std::move(columns), count); + } + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + return TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), field, std::make_shared(extCol))); + } + + [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + auto res = arrow::compute::Filter(batch, filter); + Y_VERIFY_S(res.ok(), res.status().message()); + Y_ABORT_UNLESS(res->kind() == arrow::Datum::TABLE); + return res->table(); + } + [[nodiscard]] static std::shared_ptr GetEmptySame(const std::shared_ptr& batch) { + return batch->Slice(0, 0); + } +}; + +template <> +class TDataBuilderPolicy { +public: + using TColumn = NAccessor::IChunkedArray; + [[nodiscard]] static std::shared_ptr Build(std::vector>&& fields, std::vector>&& columns, const ui32 count) { + Y_ABORT_UNLESS(columns.size()); + for (auto&& i : columns) { + Y_ABORT_UNLESS(i->GetRecordsCount() == count); + } + return std::make_shared(std::make_shared(std::move(fields)), std::move(columns)); + } + [[nodiscard]] static std::shared_ptr AddColumn(const std::shared_ptr& batch, const std::shared_ptr& field, const std::shared_ptr& extCol) { + batch->AddField(field, std::make_shared(extCol)).Validate(); + return batch; + } + [[nodiscard]] static std::shared_ptr ApplyArrowFilter(const std::shared_ptr& batch, const std::shared_ptr& filter) { + auto table = batch->BuildTable(); + return std::make_shared(TDataBuilderPolicy::ApplyArrowFilter(table, filter)); + } + [[nodiscard]] static std::shared_ptr GetEmptySame(const std::shared_ptr& batch) { + return batch->BuildEmptySame(); + } +}; + +} diff --git a/ydb/core/formats/arrow/common/container.cpp b/ydb/core/formats/arrow/common/container.cpp new file mode 100644 index 000000000000..58ce2dfb4447 --- /dev/null +++ b/ydb/core/formats/arrow/common/container.cpp @@ -0,0 +1,116 @@ +#include "container.h" +#include +#include + +namespace NKikimr::NArrow { + +NKikimr::TConclusionStatus TGeneralContainer::MergeColumnsStrictly(const TGeneralContainer& container) { + if (RecordsCount != container.RecordsCount) { + return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in additional container: " << + container.GetSchema()->ToString() << ". expected: " << RecordsCount << ", reality: " << container.GetRecordsCount()); + } + for (i32 i = 0; i < container.Schema->num_fields(); ++i) { + auto addFieldResult = AddField(container.Schema->field(i), container.Columns[i]); + if (!addFieldResult) { + return addFieldResult; + } + } + return TConclusionStatus::Success(); +} + +NKikimr::TConclusionStatus TGeneralContainer::AddField(const std::shared_ptr& f, const std::shared_ptr& data) { + AFL_VERIFY(f); + AFL_VERIFY(data); + if (data->GetRecordsCount() != RecordsCount) { + return TConclusionStatus::Fail(TStringBuilder() << "inconsistency records count in new column: " << + f->name() << ". expected: " << RecordsCount << ", reality: " << data->GetRecordsCount()); + } + if (!data->GetDataType()->Equals(f->type())) { + return TConclusionStatus::Fail("schema and data type are not equals: " + data->GetDataType()->ToString() + " vs " + f->type()->ToString()); + } + if (Schema->GetFieldByName(f->name())) { + return TConclusionStatus::Fail("field name duplication: " + f->name()); + } + auto resultAdd = Schema->AddField(Schema->num_fields(), f); + if (!resultAdd.ok()) { + return TConclusionStatus::Fail("internal schema error on add field: " + resultAdd.status().ToString()); + } + Schema = *resultAdd; + Columns.emplace_back(data); + return TConclusionStatus::Success(); +} + +TGeneralContainer::TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns) + : Schema(schema) + , Columns(std::move(columns)) +{ + AFL_VERIFY(schema); + std::optional recordsCount; + AFL_VERIFY(Schema->num_fields() == (i32)Columns.size())("schema", Schema->num_fields())("columns", Columns.size()); + for (i32 i = 0; i < Schema->num_fields(); ++i) { + AFL_VERIFY(Columns[i]); + AFL_VERIFY(Schema->field(i)->type()->Equals(Columns[i]->GetDataType())); + if (!recordsCount) { + recordsCount = Columns[i]->GetRecordsCount(); + } else { + AFL_VERIFY(*recordsCount == Columns[i]->GetRecordsCount()) + ("event", "inconsistency_records_count")("expect", *recordsCount)("real", Columns[i]->GetRecordsCount())("field_name", Schema->field(i)->name()); + } + } + AFL_VERIFY(recordsCount); + RecordsCount = *recordsCount; +} + +TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) { + AFL_VERIFY(table); + Schema = table->schema(); + RecordsCount = table->num_rows(); + for (auto&& i : table->columns()) { + if (i->num_chunks() == 1) { + Columns.emplace_back(std::make_shared(i->chunk(0))); + } else { + Columns.emplace_back(std::make_shared(i)); + } + } +} + +TGeneralContainer::TGeneralContainer(const std::shared_ptr& table) { + AFL_VERIFY(table); + Schema = table->schema(); + RecordsCount = table->num_rows(); + for (auto&& i : table->columns()) { + Columns.emplace_back(std::make_shared(i)); + } +} + +std::shared_ptr TGeneralContainer::GetAccessorByNameVerified(const std::string& fieldId) const { + auto result = GetAccessorByNameOptional(fieldId); + AFL_VERIFY(result)("event", "cannot_find_accessor_in_general_container")("field_id", fieldId)("schema", Schema->ToString()); + return result; +} + +std::shared_ptr TGeneralContainer::BuildEmptySame() const { + std::vector> columns; + for (auto&& c : Columns) { + columns.emplace_back(std::make_shared(NArrow::TThreadSimpleArraysCache::GetNull(c->GetDataType(), 0))); + } + return std::make_shared(Schema, std::move(columns)); +} + +std::shared_ptr TGeneralContainer::BuildTable(const std::optional>& columnNames /*= {}*/) const { + std::vector> columns; + std::vector> fields; + ui32 count = 0; + for (i32 i = 0; i < Schema->num_fields(); ++i) { + if (columnNames && !columnNames->contains(Schema->field(i)->name())) { + continue; + } + ++count; + columns.emplace_back(Columns[i]->GetChunkedArray()); + fields.emplace_back(Schema->field(i)); + } + AFL_VERIFY(!columnNames || count == columnNames->size()); + return arrow::Table::Make(std::make_shared(fields), columns, RecordsCount); +} + +} diff --git a/ydb/core/formats/arrow/common/container.h b/ydb/core/formats/arrow/common/container.h new file mode 100644 index 000000000000..638ce85fe1ec --- /dev/null +++ b/ydb/core/formats/arrow/common/container.h @@ -0,0 +1,61 @@ +#pragma once +#include "accessor.h" + +#include +#include + +#include +#include + +#include +#include + +namespace NKikimr::NArrow { + +class TGeneralContainer { +private: + YDB_READONLY(ui64, RecordsCount, 0); + YDB_READONLY_DEF(std::shared_ptr, Schema); + std::vector> Columns; +public: + TString DebugString() const { + return TStringBuilder() + << "records_count=" << RecordsCount << ";" + << "schema=" << Schema->ToString() << ";" + ; + } + + ui64 num_rows() const { + return RecordsCount; + } + + std::shared_ptr BuildTable(const std::optional>& columnNames = {}) const; + + std::shared_ptr BuildEmptySame() const; + + [[nodiscard]] TConclusionStatus MergeColumnsStrictly(const TGeneralContainer& container); + [[nodiscard]] TConclusionStatus AddField(const std::shared_ptr& f, const std::shared_ptr& data); + + TGeneralContainer(const std::shared_ptr& table); + + TGeneralContainer(const std::shared_ptr& table); + + TGeneralContainer(const std::shared_ptr& schema, std::vector>&& columns); + + arrow::Status ValidateFull() const { + return arrow::Status::OK(); + } + + std::shared_ptr GetAccessorByNameOptional(const std::string& fieldId) const { + for (i32 i = 0; i < Schema->num_fields(); ++i) { + if (Schema->field(i)->name() == fieldId) { + return Columns[i]; + } + } + return nullptr; + } + + std::shared_ptr GetAccessorByNameVerified(const std::string& fieldId) const; +}; + +} diff --git a/ydb/core/formats/arrow/common/ya.make b/ydb/core/formats/arrow/common/ya.make index e060fae10d8e..61f742b09b76 100644 --- a/ydb/core/formats/arrow/common/ya.make +++ b/ydb/core/formats/arrow/common/ya.make @@ -2,11 +2,16 @@ LIBRARY() PEERDIR( contrib/libs/apache/arrow + ydb/core/formats/arrow/switch ydb/library/actors/core + ydb/library/conclusion ) SRCS( + container.cpp validation.cpp + adapter.cpp + accessor.cpp ) END() diff --git a/ydb/core/formats/arrow/converter.cpp b/ydb/core/formats/arrow/converter.cpp index 02610aba6335..8d4be308e3e6 100644 --- a/ydb/core/formats/arrow/converter.cpp +++ b/ydb/core/formats/arrow/converter.cpp @@ -45,20 +45,20 @@ static bool ConvertData(TCell& cell, const NScheme::TTypeInfo& colType, TMemoryP return true; } -static bool ConvertColumn(const NScheme::TTypeInfo colType, std::shared_ptr& column, std::shared_ptr& field) { +static arrow::Status ConvertColumn(const NScheme::TTypeInfo colType, std::shared_ptr& column, std::shared_ptr& field) { switch (colType.GetTypeId()) { case NScheme::NTypeIds::Decimal: - return false; + return arrow::Status::TypeError("Cannot convert Decimal type"); case NScheme::NTypeIds::JsonDocument: { const static TSet jsonDocArrowTypes{ arrow::Type::BINARY, arrow::Type::STRING }; if (!jsonDocArrowTypes.contains(column->type()->id())) { - return false; + return arrow::Status::TypeError("Cannot convert JsonDocument to ", column->type()->ToString()); } break; } default: if (column->type()->id() != arrow::Type::BINARY) { - return false; + return arrow::Status::TypeError("Cannot convert ", NScheme::TypeName(colType), " to ", column->type()->ToString()); } break; } @@ -73,8 +73,12 @@ static bool ConvertColumn(const NScheme::TTypeInfo colType, std::shared_ptrData(), binaryJson->Size()).ok()) { - return false; + if (!binaryJson.Defined()) { + return arrow::Status::SerializationError("Cannot serialize json: ", valueBuf); + } + auto appendResult = builder.Append(binaryJson->Data(), binaryJson->Size()); + if (!appendResult.ok()) { + return appendResult; } } } @@ -105,8 +114,9 @@ static bool ConvertColumn(const NScheme::TTypeInfo colType, std::shared_ptr result; - if (!builder.Finish(&result).ok()) { - return false; + auto finishResult = builder.Finish(&result); + if (!finishResult.ok()) { + return finishResult; } column = result; @@ -114,10 +124,10 @@ static bool ConvertColumn(const NScheme::TTypeInfo colType, std::shared_ptr(field->name(), std::make_shared()); } - return true; + return arrow::Status::OK(); } -std::shared_ptr ConvertColumns(const std::shared_ptr& batch, +arrow::Result> ConvertColumns(const std::shared_ptr& batch, const THashMap& columnsToConvert) { std::vector> columns = batch->columns(); @@ -127,8 +137,9 @@ std::shared_ptr ConvertColumns(const std::shared_ptrcolumn_name(i); auto it = columnsToConvert.find(TString(colName.data(), colName.size())); if (it != columnsToConvert.end()) { - if (!ConvertColumn(it->second, columns[i], fields[i])) { - return {}; + auto convertResult = ConvertColumn(it->second, columns[i], fields[i]); + if (!convertResult.ok()) { + return arrow::Status::FromArgs(convertResult.code(), "column ", colName, ": ", convertResult.ToString()); } } } @@ -173,7 +184,7 @@ static std::shared_ptr InplaceConvertColumn(const std::shared_ptr< } } -std::shared_ptr InplaceConvertColumns(const std::shared_ptr& batch, +arrow::Result> InplaceConvertColumns(const std::shared_ptr& batch, const THashMap& columnsToConvert) { std::vector> columns = batch->columns(); std::vector> fields; diff --git a/ydb/core/formats/arrow/converter.h b/ydb/core/formats/arrow/converter.h index 47906fb48150..b998a724bd01 100644 --- a/ydb/core/formats/arrow/converter.h +++ b/ydb/core/formats/arrow/converter.h @@ -71,9 +71,9 @@ class TArrowToYdbConverter { bool Process(const arrow::RecordBatch& batch, TString& errorMessage); }; -std::shared_ptr ConvertColumns(const std::shared_ptr& batch, - const THashMap& columnsToConvert); -std::shared_ptr InplaceConvertColumns(const std::shared_ptr& batch, - const THashMap& columnsToConvert); +arrow::Result> ConvertColumns(const std::shared_ptr& batch, + const THashMap& columnsToConvert); +arrow::Result> InplaceConvertColumns(const std::shared_ptr& batch, + const THashMap& columnsToConvert); } // namespace NKikimr::NArrow diff --git a/ydb/core/formats/arrow/dictionary/object.h b/ydb/core/formats/arrow/dictionary/object.h index ba8b39a4f683..2fd4d6a12924 100644 --- a/ydb/core/formats/arrow/dictionary/object.h +++ b/ydb/core/formats/arrow/dictionary/object.h @@ -13,7 +13,9 @@ class TEncodingSettings { TEncodingSettings() = default; friend class TEncodingDiff; public: - + bool IsEqualTo(const TEncodingSettings& item) const { + return Enabled == item.Enabled; + } NTransformation::ITransformer::TPtr BuildEncoder() const; NTransformation::ITransformer::TPtr BuildDecoder() const; @@ -21,7 +23,7 @@ class TEncodingSettings { static TConclusion BuildFromProto(const NKikimrSchemeOp::TDictionaryEncodingSettings& proto) { TEncodingSettings result; auto resultParse = result.DeserializeFromProto(proto); - if (!resultParse) { + if (resultParse.IsFail()) { return resultParse; } return result; diff --git a/ydb/core/formats/arrow/hash/calcer.cpp b/ydb/core/formats/arrow/hash/calcer.cpp index b9833216dc6d..71af0492cfea 100644 --- a/ydb/core/formats/arrow/hash/calcer.cpp +++ b/ydb/core/formats/arrow/hash/calcer.cpp @@ -83,53 +83,6 @@ TXX64::TXX64(const std::vector& columnNames, const ENoColumnPolicy Y_ABORT_UNLESS(ColumnNames.size() >= 1); } -std::shared_ptr TXX64::ExecuteToArray(const std::shared_ptr& batch, const std::string& hashFieldName) const { - std::vector> columns = GetColumns(batch); - if (columns.empty()) { - return nullptr; - } - - auto builder = NArrow::MakeBuilder(std::make_shared(hashFieldName, arrow::TypeTraits::type_singleton())); - auto& intBuilder = static_cast(*builder); - TStatusValidator::Validate(intBuilder.Reserve(batch->num_rows())); - { - NXX64::TStreamStringHashCalcer hashCalcer(Seed); - for (int row = 0; row < batch->num_rows(); ++row) { - hashCalcer.Start(); - for (auto& column : columns) { - AppendField(column, row, hashCalcer); - } - intBuilder.UnsafeAppend(hashCalcer.Finish()); - } - } - return NArrow::TStatusValidator::GetValid(builder->Finish()); -} - -std::vector> TXX64::GetColumns(const std::shared_ptr& batch) const { - std::vector> columns; - columns.reserve(ColumnNames.size()); - for (auto& colName : ColumnNames) { - auto array = batch->GetColumnByName(colName); - if (!array) { - switch (NoColumnPolicy) { - case ENoColumnPolicy::Ignore: - break; - case ENoColumnPolicy::Verify: - AFL_VERIFY(false)("reason", "no_column")("column_name", colName); - case ENoColumnPolicy::ReturnEmpty: - return {}; - } - } else { - columns.emplace_back(array); - } - } - if (columns.empty()) { - AFL_WARN(NKikimrServices::ARROW_HELPER)("event", "cannot_read_all_columns")("reason", "fields_not_found") - ("field_names", JoinSeq(",", ColumnNames))("batch_fields", JoinSeq(",", batch->schema()->field_names())); - } - return columns; -} - ui64 TXX64::CalcHash(const std::shared_ptr& scalar) { NXX64::TStreamStringHashCalcer calcer(0); calcer.Start(); diff --git a/ydb/core/formats/arrow/hash/calcer.h b/ydb/core/formats/arrow/hash/calcer.h index d7f49e2ef1ee..465549e22c1a 100644 --- a/ydb/core/formats/arrow/hash/calcer.h +++ b/ydb/core/formats/arrow/hash/calcer.h @@ -1,13 +1,21 @@ #pragma once -#include -#include +#include "xx_hash.h" +#include +#include +#include + +#include +#include #include #include +#include +#include +#include + #include #include -#include "xx_hash.h" namespace NKikimr::NArrow::NHash { @@ -23,7 +31,31 @@ class TXX64 { const std::vector ColumnNames; const ENoColumnPolicy NoColumnPolicy; - std::vector> GetColumns(const std::shared_ptr& batch) const; + template + std::vector::TColumn>> GetColumns(const std::shared_ptr& batch) const { + std::vector::TColumn>> columns; + columns.reserve(ColumnNames.size()); + for (auto& colName : ColumnNames) { + auto array = batch->GetColumnByName(colName); + if (!array) { + switch (NoColumnPolicy) { + case ENoColumnPolicy::Ignore: + break; + case ENoColumnPolicy::Verify: + AFL_VERIFY(false)("reason", "no_column")("column_name", colName); + case ENoColumnPolicy::ReturnEmpty: + return {}; + } + } else { + columns.emplace_back(array); + } + } + if (columns.empty()) { + AFL_WARN(NKikimrServices::ARROW_HELPER)("event", "cannot_read_all_columns")("reason", "fields_not_found") + ("field_names", JoinSeq(",", ColumnNames))("batch_fields", JoinSeq(",", batch->schema()->field_names())); + } + return columns; + } public: TXX64(const std::vector& columnNames, const ENoColumnPolicy noColumnPolicy, const ui64 seed = 0); @@ -33,7 +65,37 @@ class TXX64 { static void AppendField(const std::shared_ptr& scalar, NXX64::TStreamStringHashCalcer& hashCalcer); static ui64 CalcHash(const std::shared_ptr& scalar); std::optional> Execute(const std::shared_ptr& batch) const; - std::shared_ptr ExecuteToArray(const std::shared_ptr& batch, const std::string& hashFieldName) const; + + template + std::shared_ptr ExecuteToArray(const std::shared_ptr& batch, const std::string& hashFieldName) const { + std::vector::TColumn>> columns = GetColumns(batch); + if (columns.empty()) { + return nullptr; + } + + std::vector columnScanners; + for (auto&& i : columns) { + columnScanners.emplace_back(NAccessor::IChunkedArray::TReader(std::make_shared::TAccessor>(i))); + } + + + auto builder = NArrow::MakeBuilder(std::make_shared(hashFieldName, arrow::TypeTraits::type_singleton())); + auto& intBuilder = static_cast(*builder); + TStatusValidator::Validate(intBuilder.Reserve(batch->num_rows())); + { + NXX64::TStreamStringHashCalcer hashCalcer(Seed); + for (int row = 0; row < batch->num_rows(); ++row) { + hashCalcer.Start(); + for (auto& column : columnScanners) { + auto address = column.GetReadChunk(row); + AppendField(address.GetArray(), address.GetPosition(), hashCalcer); + } + intBuilder.UnsafeAppend(hashCalcer.Finish()); + } + } + return NArrow::TStatusValidator::GetValid(builder->Finish()); + } + }; } diff --git a/ydb/core/formats/arrow/hash/ya.make b/ydb/core/formats/arrow/hash/ya.make index 031034a4d2e6..6d9a98b836a6 100644 --- a/ydb/core/formats/arrow/hash/ya.make +++ b/ydb/core/formats/arrow/hash/ya.make @@ -4,6 +4,7 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/switch + ydb/core/formats/arrow/reader ydb/library/actors/core ydb/library/services ydb/library/actors/protos diff --git a/ydb/core/formats/arrow/merging_sorted_input_stream.cpp b/ydb/core/formats/arrow/merging_sorted_input_stream.cpp deleted file mode 100644 index 7cc56d695a05..000000000000 --- a/ydb/core/formats/arrow/merging_sorted_input_stream.cpp +++ /dev/null @@ -1,303 +0,0 @@ -// The code in this file is based on original ClickHouse source code -// which is licensed under Apache license v2.0 -// See: https://github.com/ClickHouse/ClickHouse/ - -#include -#include "merging_sorted_input_stream.h" -#include "switch_type.h" -#include "size_calcer.h" - -namespace NKikimr::NArrow { - -class TRowsBuffer : public IRowsBuffer { -public: - using TBuilders = std::vector>; - - static constexpr const size_t BUFFER_SIZE = 256; - - TRowsBuffer(TBuilders& columns, size_t maxRows) - : Columns(columns) - , MaxRows(maxRows) - { - Rows.reserve(BUFFER_SIZE); - } - - bool AddRow(const TSortCursor& cursor) override { - Rows.emplace_back(cursor->all_columns, cursor->getRow()); - if (Rows.size() >= BUFFER_SIZE) { - Flush(); - } - ++AddedRows; - return true; - } - - void Flush() override { - if (Rows.empty()) { - return; - } - for (size_t i = 0; i < Columns.size(); ++i) { - arrow::ArrayBuilder& builder = *Columns[i]; - for (auto& [srcColumn, rowPosition] : Rows) { - Append(builder, *srcColumn->at(i), rowPosition); - } - } - Rows.clear(); - } - - bool Limit() const override { - return MaxRows && (AddedRows >= MaxRows); - } - - bool HasLimit() const override { - return MaxRows; - } - -private: - TBuilders& Columns; - std::vector> Rows; - size_t MaxRows = 0; - size_t AddedRows = 0; -}; - -class TSlicedRowsBuffer : public IRowsBuffer { -public: - TSlicedRowsBuffer(size_t maxRows) - : MaxRows(maxRows) - {} - - bool AddRow(const TSortCursor& cursor) override { - if (!Batch) { - Batch = cursor->current_batch; - Offset = cursor->getRow(); - } - if (Batch.get() != cursor->current_batch.get()) { - // append from another batch - return false; - } else if (cursor->getRow() != (Offset + AddedRows)) { - // append from the same batch with data hole - return false; - } - ++AddedRows; - return true; - } - - void Flush() override { - } - - bool Limit() const override { - return MaxRows && (AddedRows >= MaxRows); - } - - bool HasLimit() const override { - return MaxRows; - } - - std::shared_ptr GetBatch() { - if (Batch) { - return Batch->Slice(Offset, AddedRows); - } - return {}; - } - -private: - std::shared_ptr Batch; - size_t Offset = 0; - size_t MaxRows = 0; - size_t AddedRows = 0; -}; - -TMergingSortedInputStream::TMergingSortedInputStream(const std::vector& inputs, - std::shared_ptr description, - size_t maxBatchRows, bool slice) - : Description(description) - , MaxBatchSize(maxBatchRows) - , SliceSources(slice) - , SourceBatches(inputs.size()) - , Cursors(inputs.size()) -{ - Children.insert(Children.end(), inputs.begin(), inputs.end()); - Header = Children.at(0)->Schema(); -} - -/// Read the first blocks, initialize the queue. -void TMergingSortedInputStream::Init() { - Y_ABORT_UNLESS(First); - First = false; - size_t totalRows = 0; - for (size_t i = 0; i < SourceBatches.size(); ++i) { - auto& batch = SourceBatches[i]; - if (batch) { - continue; - } - - batch = Children[i]->Read(); - if (!batch || batch->num_rows() == 0) { - continue; - } - - for (i32 i = 0; i < batch->num_columns(); ++i) { - ColumnSize[batch->column_name(i)] += NArrow::GetArrayDataSize(batch->column(i)); - } - - totalRows += batch->num_rows(); - Cursors[i] = TSortCursorImpl(batch, Description, i); - } - - ExpectedBatchSize = MaxBatchSize ? std::min(totalRows, MaxBatchSize) : totalRows; - if (MaxBatchSize && MaxBatchSize < totalRows) { - ColumnSize.clear(); - } - - Queue = TSortingHeap(Cursors, Description->NotNull); - - /// Let's check that all source blocks have the same structure. - for (const auto& batch : SourceBatches) { - if (batch) { - Y_DEBUG_ABORT_UNLESS(batch->schema()->Equals(*Header)); - } - } -} - -std::shared_ptr TMergingSortedInputStream::ReadImpl() { - if (Finished) { - return {}; - } - - if (Children.size() == 1 && !Description->Replace()) { - return Children[0]->Read(); - } - - if (First) { - Init(); - } - - if (SliceSources) { - Y_DEBUG_ABORT_UNLESS(!Description->Reverse); - TSlicedRowsBuffer rowsBuffer(MaxBatchSize); - Merge(rowsBuffer, Queue); - auto batch = rowsBuffer.GetBatch(); - Y_ABORT_UNLESS(batch); - if (!batch->num_rows()) { - Y_ABORT_UNLESS(Finished); - return {}; - } - return batch; - } else { - auto builders = NArrow::MakeBuilders(Header, ExpectedBatchSize, ColumnSize); - if (builders.empty()) { - return {}; - } - - Y_ABORT_UNLESS(builders.size() == (size_t)Header->num_fields()); - TRowsBuffer rowsBuffer(builders, MaxBatchSize); - Merge(rowsBuffer, Queue); - - auto arrays = NArrow::Finish(std::move(builders)); - Y_ABORT_UNLESS(arrays.size()); - if (!arrays[0]->length()) { - Y_ABORT_UNLESS(Finished); - return {}; - } - return arrow::RecordBatch::Make(Header, arrays[0]->length(), arrays); - } -} - -/// Get the next block from the corresponding source, if there is one. -void TMergingSortedInputStream::FetchNextBatch(const TSortCursor& current, TSortingHeap& queue) { - size_t order = current->order; - Y_ABORT_UNLESS(order < Cursors.size() && &Cursors[order] == current.Impl); - - while (true) { - SourceBatches[order] = Children[order]->Read(); - auto& batch = SourceBatches[order]; - - if (!batch) { - queue.RemoveTop(); - break; - } - - if (batch->num_rows()) { - Y_DEBUG_ABORT_UNLESS(batch->schema()->Equals(*Header)); - - Cursors[order].Reset(batch); - queue.ReplaceTop(TSortCursor(&Cursors[order], Description->NotNull)); - break; - } - } -} - -/// Take rows in required order and put them into `rowBuffer`, -/// while the number of rows are no more than `max_block_size` -template -void TMergingSortedInputStream::MergeImpl(IRowsBuffer& rowsBuffer, TSortingHeap& queue) { - if constexpr (replace) { - if (!PrevKey && queue.IsValid()) { - auto current = queue.Current(); - PrevKey = std::make_shared(current->replace_columns, current->getRow()); - if (!rowsBuffer.AddRow(current)) { - return; - } - // Do not get Next() for simplicity. Lead to a dup - } - } - - while (queue.IsValid()) { - if constexpr (limit) { - if (rowsBuffer.Limit()) { - return; - } - } - - auto current = queue.Current(); - - if constexpr (replace) { - TReplaceKey key(current->replace_columns, current->getRow()); - - if (key == *PrevKey) { - // do nothing - } else if (rowsBuffer.AddRow(current)) { - *PrevKey = key; - } else { - return; - } - } else { - if (!rowsBuffer.AddRow(current)) { - return; - } - } - - if (!current->isLast()) { - queue.Next(); - } else { - rowsBuffer.Flush(); - FetchNextBatch(current, queue); - } - } - - /// We have read all data. Ask children to cancel providing more data. - Cancel(); - Finished = true; -} - -void TMergingSortedInputStream::Merge(IRowsBuffer& rowsBuffer, TSortingHeap& queue) { - const bool replace = Description->Replace(); - const bool limit = rowsBuffer.HasLimit(); - - if (replace) { - if (limit) { - MergeImpl(rowsBuffer, queue); - } else { - MergeImpl(rowsBuffer, queue); - } - } else { - if (limit) { - MergeImpl(rowsBuffer, queue); - } else { - MergeImpl(rowsBuffer, queue); - } - } - - rowsBuffer.Flush(); -} - -} diff --git a/ydb/core/formats/arrow/merging_sorted_input_stream.h b/ydb/core/formats/arrow/merging_sorted_input_stream.h deleted file mode 100644 index 2a3a2f722775..000000000000 --- a/ydb/core/formats/arrow/merging_sorted_input_stream.h +++ /dev/null @@ -1,54 +0,0 @@ -// The code in this file is based on original ClickHouse source code -// which is licensed under Apache license v2.0 -// See: https://github.com/ClickHouse/ClickHouse/ - -#pragma once -#include "input_stream.h" -#include "sort_cursor.h" - -namespace NKikimr::NArrow { - -struct IRowsBuffer { - virtual bool AddRow(const TSortCursor& cursor) = 0; - virtual void Flush() = 0; - virtual bool Limit() const = 0; - virtual bool HasLimit() const = 0; -}; - -/// Merges several sorted streams into one sorted stream. -class TMergingSortedInputStream : public IInputStream { -public: - TMergingSortedInputStream(const std::vector& inputs, - std::shared_ptr description, - size_t maxBatchRows, bool slice = false); - - std::shared_ptr Schema() const override { return Header; } - -protected: - std::shared_ptr ReadImpl() override; - -private: - std::shared_ptr Header; - std::shared_ptr Description; - const ui64 MaxBatchSize; - const bool SliceSources; - bool First = true; - bool Finished = false; - ui64 ExpectedBatchSize = 0; /// May be smaller or equal to max_block_size. To do 'reserve' for columns. - std::map ColumnSize; - - std::vector> SourceBatches; - std::shared_ptr PrevKey; - - std::vector Cursors; - TSortingHeap Queue; - - void Init(); - void FetchNextBatch(const TSortCursor& current, TSortingHeap& queue); - void Merge(IRowsBuffer& rowsBuffer, TSortingHeap& queue); - - template - void MergeImpl(IRowsBuffer& rowsBuffer, TSortingHeap& queue); -}; - -} diff --git a/ydb/core/formats/arrow/one_batch_input_stream.h b/ydb/core/formats/arrow/one_batch_input_stream.h deleted file mode 100644 index 647e70a3f6a7..000000000000 --- a/ydb/core/formats/arrow/one_batch_input_stream.h +++ /dev/null @@ -1,36 +0,0 @@ -// The code in this file is based on original ClickHouse source code -// which is licensed under Apache license v2.0 -// See: https://github.com/ClickHouse/ClickHouse/ - -#pragma once -#include "input_stream.h" - -namespace NKikimr::NArrow { - -class TOneBatchInputStream : public IInputStream { -public: - explicit TOneBatchInputStream(std::shared_ptr batch) - : Batch(batch) - , Header(Batch->schema()) - {} - - std::shared_ptr Schema() const override { - return Header; - } - -protected: - std::shared_ptr ReadImpl() override { - if (Batch) { - auto out = Batch; - Batch.reset(); - return out; - } - return {}; - } - -private: - std::shared_ptr Batch; - std::shared_ptr Header; -}; - -} diff --git a/ydb/core/formats/arrow/permutations.cpp b/ydb/core/formats/arrow/permutations.cpp index f1a68600bb70..ace1fad33663 100644 --- a/ydb/core/formats/arrow/permutations.cpp +++ b/ydb/core/formats/arrow/permutations.cpp @@ -1,5 +1,6 @@ -#include "arrow_helpers.h" #include "permutations.h" + +#include "arrow_helpers.h" #include "replace_key.h" #include "size_calcer.h" #include "hash/calcer.h" @@ -180,7 +181,10 @@ std::shared_ptr CopyRecords(const std::shared_ptr& s return result; } -bool THashConstructor::BuildHashUI64(std::shared_ptr& batch, const std::vector& fieldNames, const std::string& hashFieldName) { +namespace { + +template +bool BuildHashUI64Impl(std::shared_ptr& batch, const std::vector& fieldNames, const std::string& hashFieldName) { if (fieldNames.size() == 0) { return false; } @@ -193,26 +197,43 @@ bool THashConstructor::BuildHashUI64(std::shared_ptr& batch, } Y_ABORT_UNLESS(column); if (column->type()->id() == arrow::Type::UINT64 || column->type()->id() == arrow::Type::UINT32 || column->type()->id() == arrow::Type::INT64 || column->type()->id() == arrow::Type::INT32) { - batch = TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), hashFieldName, column)); + batch = TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), std::make_shared(hashFieldName, column->type()), column)); return true; } } - std::shared_ptr hashColumn = NArrow::NHash::TXX64(fieldNames, NArrow::NHash::TXX64::ENoColumnPolicy::Verify, 34323543).ExecuteToArray(batch, hashFieldName); - batch = TStatusValidator::GetValid(batch->AddColumn(batch->num_columns(), hashFieldName, hashColumn)); + std::shared_ptr hashColumn = NArrow::NHash::TXX64(fieldNames, NArrow::NHash::TXX64::ENoColumnPolicy::Verify, 34323543) + .ExecuteToArray(batch, hashFieldName); + batch = NAdapter::TDataBuilderPolicy::AddColumn(batch, std::make_shared(hashFieldName, hashColumn->type()), hashColumn); return true; } +} + +bool THashConstructor::BuildHashUI64(std::shared_ptr& batch, const std::vector& fieldNames, const std::string& hashFieldName) { + return BuildHashUI64Impl(batch, fieldNames, hashFieldName); +} + +bool THashConstructor::BuildHashUI64(std::shared_ptr& batch, const std::vector& fieldNames, const std::string& hashFieldName) { + return BuildHashUI64Impl(batch, fieldNames, hashFieldName); +} + ui64 TShardedRecordBatch::GetMemorySize() const { - return NArrow::GetBatchMemorySize(RecordBatch); + return NArrow::GetTableMemorySize(RecordBatch); +} + +TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch) { + AFL_VERIFY(batch); + RecordBatch = TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batch->schema(), {batch})); } -TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch) + +TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch) : RecordBatch(batch) { AFL_VERIFY(RecordBatch); } -TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch, std::vector>&& splittedByShards) +TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr& batch, std::vector>&& splittedByShards) : RecordBatch(batch) , SplittedByShards(std::move(splittedByShards)) { @@ -220,13 +241,13 @@ TShardedRecordBatch::TShardedRecordBatch(const std::shared_ptr> TShardingSplitIndex::Apply(const std::shared_ptr& input) { +std::vector> TShardingSplitIndex::Apply(const std::shared_ptr& input) { AFL_VERIFY(input); AFL_VERIFY(input->num_rows() == RecordsCount); auto permutation = BuildPermutation(); - auto resultBatch = NArrow::TStatusValidator::GetValid(arrow::compute::Take(input, *permutation)).record_batch(); + auto resultBatch = NArrow::TStatusValidator::GetValid(arrow::compute::Take(input, *permutation)).table(); AFL_VERIFY(resultBatch->num_rows() == RecordsCount); - std::vector> result; + std::vector> result; ui64 startIndex = 0; for (auto&& i : Remapping) { result.emplace_back(resultBatch->Slice(startIndex, i.size())); @@ -236,7 +257,7 @@ std::vector> TShardingSplitIndex::Apply(cons return result; } -NKikimr::NArrow::TShardedRecordBatch TShardingSplitIndex::Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName) { +NKikimr::NArrow::TShardedRecordBatch TShardingSplitIndex::Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName) { AFL_VERIFY(input); if (shardsCount == 1) { return TShardedRecordBatch(input); @@ -261,6 +282,11 @@ NKikimr::NArrow::TShardedRecordBatch TShardingSplitIndex::Apply(const ui32 shard return TShardedRecordBatch(resultBatch, splitter->DetachRemapping()); } +TShardedRecordBatch TShardingSplitIndex::Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName) { + return Apply(shardsCount, TStatusValidator::GetValid(arrow::Table::FromRecordBatches(input->schema(), {input})) + , hashColumnName); +} + std::shared_ptr TShardingSplitIndex::BuildPermutation() const { arrow::UInt64Builder builder; Y_ABORT_UNLESS(builder.Reserve(RecordsCount).ok()); @@ -282,4 +308,10 @@ std::shared_ptr ReverseRecords(const std::shared_ptr ReverseRecords(const std::shared_ptr& batch) { + AFL_VERIFY(batch); + auto permutation = NArrow::MakePermutation(batch->num_rows(), true); + return NArrow::TStatusValidator::GetValid(arrow::compute::Take(batch, permutation)).table(); +} + } diff --git a/ydb/core/formats/arrow/permutations.h b/ydb/core/formats/arrow/permutations.h index 584db8350888..73a433ee52a2 100644 --- a/ydb/core/formats/arrow/permutations.h +++ b/ydb/core/formats/arrow/permutations.h @@ -1,4 +1,7 @@ #pragma once +#include "arrow_helpers.h" + +#include #include #include #include @@ -7,14 +10,17 @@ namespace NKikimr::NArrow { class THashConstructor { public: + static bool BuildHashUI64(std::shared_ptr& batch, const std::vector& fieldNames, const std::string& hashFieldName); static bool BuildHashUI64(std::shared_ptr& batch, const std::vector& fieldNames, const std::string& hashFieldName); + }; class TShardedRecordBatch { private: - YDB_READONLY_DEF(std::shared_ptr, RecordBatch); + YDB_READONLY_DEF(std::shared_ptr, RecordBatch); YDB_READONLY_DEF(std::vector>, SplittedByShards); public: + TShardedRecordBatch(const std::shared_ptr& batch); TShardedRecordBatch(const std::shared_ptr& batch); void Cut(const ui32 limit) { @@ -31,11 +37,7 @@ class TShardedRecordBatch { return SplittedByShards.size() > 1; } - TShardedRecordBatch(const std::shared_ptr& batch, std::vector>&& splittedByShards); - - void StripColumns(const std::shared_ptr& schema) { - RecordBatch = NArrow::ExtractColumns(RecordBatch, schema); - } + TShardedRecordBatch(const std::shared_ptr& batch, std::vector>&& splittedByShards); ui64 GetMemorySize() const; @@ -74,17 +76,22 @@ class TShardingSplitIndex { } template - void Initialize(const TIntArrowArray& arrowHashArray) { + void Initialize(const arrow::ChunkedArray& arrowHashArrayChunked) { Y_ABORT_UNLESS(ShardsCount); Remapping.resize(ShardsCount); - const ui32 expectation = arrowHashArray.length() / ShardsCount + 1; + const ui32 expectation = arrowHashArrayChunked.length() / ShardsCount + 1; for (auto&& i : Remapping) { i.reserve(2 * expectation); } - for (ui64 i = 0; i < (ui64)arrowHashArray.length(); ++i) { - const i64 v = arrowHashArray.GetView(i); - const ui32 idx = ((v < 0) ? (-v) : v) % ShardsCount; - Remapping[idx].emplace_back(i); + for (auto&& arrowHashArrayAbstract : arrowHashArrayChunked.chunks()) { + auto& arrowHashArray = static_cast(*arrowHashArrayAbstract); + ui64 offset = 0; + for (ui64 i = 0; i < (ui64)arrowHashArray.length(); ++i) { + const i64 v = arrowHashArray.GetView(i); + const ui32 idx = ((v < 0) ? (-v) : v) % ShardsCount; + Remapping[idx].emplace_back(offset + i); + } + offset += (ui64)arrowHashArray.length(); } std::deque*> sizeCorrection; for (auto&& i : Remapping) { @@ -112,7 +119,7 @@ class TShardingSplitIndex { } } - TShardingSplitIndex(const ui32 shardsCount, const arrow::Array& arrowHashArray) + TShardingSplitIndex(const ui32 shardsCount, const arrow::ChunkedArray& arrowHashArray) : ShardsCount(shardsCount) , RecordsCount(arrowHashArray.length()) { } @@ -124,16 +131,16 @@ class TShardingSplitIndex { } template - static TShardingSplitIndex Build(const ui32 shardsCount, const arrow::Array& arrowHashArray) { + static TShardingSplitIndex Build(const ui32 shardsCount, const arrow::ChunkedArray& arrowHashArray) { TShardingSplitIndex result(shardsCount, arrowHashArray); - result.Initialize(static_cast(arrowHashArray)); + result.Initialize(arrowHashArray); return result; } std::shared_ptr BuildPermutation() const; - std::vector> Apply(const std::shared_ptr& input); - + std::vector> Apply(const std::shared_ptr& input); + static TShardedRecordBatch Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName); static TShardedRecordBatch Apply(const ui32 shardsCount, const std::shared_ptr& input, const std::string& hashColumnName); }; @@ -142,6 +149,7 @@ std::shared_ptr MakeFilterPermutation(const std::vector MakeFilterPermutation(const std::vector& indexes); std::shared_ptr MakeSortPermutation(const std::shared_ptr& batch, const std::shared_ptr& sortingKey, const bool andUnique); std::shared_ptr ReverseRecords(const std::shared_ptr& batch); +std::shared_ptr ReverseRecords(const std::shared_ptr& batch); std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes); std::shared_ptr CopyRecords(const std::shared_ptr& source, const std::vector& indexes); diff --git a/ydb/core/formats/arrow/program.cpp b/ydb/core/formats/arrow/program.cpp index 6523d12b2c38..f84435bdefd5 100644 --- a/ydb/core/formats/arrow/program.cpp +++ b/ydb/core/formats/arrow/program.cpp @@ -33,6 +33,7 @@ struct GroupByOptions: public arrow::compute::ScalarAggregateOptions { }; } #endif +#include "common/container.h" #include #include @@ -148,11 +149,11 @@ class TKernelFunction : public IStepFunction { if (!arguments) { return arrow::Status::Invalid("Error parsing args."); } -// try { + try { return Function->Execute(*arguments, assign.GetOptions(), TBase::Ctx); -// } catch (const std::exception& ex) { -// return arrow::Status::ExecutionError(ex.what()); -// } + } catch (const std::exception& ex) { + return arrow::Status::ExecutionError(ex.what()); + } } }; @@ -552,13 +553,36 @@ arrow::Result TDatumBatch::GetColumnByName(const std::string& name return Datums[i]; } +std::shared_ptr TDatumBatch::ToTable() const { + std::vector> columns; + columns.reserve(Datums.size()); + for (auto col : Datums) { + if (col.is_scalar()) { + columns.push_back(std::make_shared(NArrow::TStatusValidator::GetValid(arrow::MakeArrayFromScalar(*col.scalar(), Rows)))); + } else if (col.is_array()) { + if (col.length() == -1) { + return {}; + } + columns.push_back(std::make_shared(col.make_array())); + } else if (col.is_arraylike()) { + if (col.length() == -1) { + return {}; + } + columns.push_back(col.chunked_array()); + } else { + AFL_VERIFY(false); + } + } + return arrow::Table::Make(Schema, columns, Rows); +} + std::shared_ptr TDatumBatch::ToRecordBatch() const { std::vector> columns; columns.reserve(Datums.size()); for (auto col : Datums) { if (col.is_scalar()) { - columns.push_back(*arrow::MakeArrayFromScalar(*col.scalar(), Rows)); - } else if (col.is_array()){ + columns.push_back(NArrow::TStatusValidator::GetValid(arrow::MakeArrayFromScalar(*col.scalar(), Rows))); + } else if (col.is_array()) { if (col.length() == -1) { return {}; } @@ -838,10 +862,30 @@ arrow::Status TProgramStep::ApplyProjection(std::shared_ptr& arrow::Status TProgramStep::Apply(std::shared_ptr& batch, arrow::compute::ExecContext* ctx) const { auto rb = TDatumBatch::FromRecordBatch(batch); - NArrow::TStatusValidator::Validate(ApplyAssignes(*rb, ctx)); - NArrow::TStatusValidator::Validate(ApplyFilters(*rb)); - NArrow::TStatusValidator::Validate(ApplyAggregates(*rb, ctx)); - NArrow::TStatusValidator::Validate(ApplyProjection(*rb)); + { + auto status = ApplyAssignes(*rb, ctx); + if (!status.ok()) { + return status; + } + } + { + auto status = ApplyFilters(*rb); + if (!status.ok()) { + return status; + } + } + { + auto status = ApplyAggregates(*rb, ctx); + if (!status.ok()) { + return status; + } + } + { + auto status = ApplyProjection(*rb); + if (!status.ok()) { + return status; + } + } batch = (*rb).ToRecordBatch(); if (!batch) { @@ -850,20 +894,28 @@ arrow::Status TProgramStep::Apply(std::shared_ptr& batch, ar return arrow::Status::OK(); } -std::set TProgramStep::GetColumnsInUsage() const { +std::set TProgramStep::GetColumnsInUsage(const bool originalOnly/* = false*/) const { std::set result; for (auto&& i : Filters) { - result.emplace(i.GetColumnName()); + if (!originalOnly || !i.IsGenerated()) { + result.emplace(i.GetColumnName()); + } } for (auto&& i : Assignes) { for (auto&& f : i.GetArguments()) { - result.emplace(f.GetColumnName()); + if (!originalOnly || !f.IsGenerated()) { + result.emplace(f.GetColumnName()); + } } } return result; } -std::shared_ptr TProgramStep::BuildFilter(const std::shared_ptr& t) const { +arrow::Result> TProgramStep::BuildFilter(const std::shared_ptr& t) const { + return BuildFilter(t->BuildTable(GetColumnsInUsage(true))); +} + +arrow::Result> TProgramStep::BuildFilter(const std::shared_ptr& t) const { if (Filters.empty()) { return nullptr; } @@ -871,7 +923,12 @@ std::shared_ptr TProgramStep::BuildFilter(const std::shar NArrow::TColumnFilter fullLocal = NArrow::TColumnFilter::BuildAllowFilter(); for (auto&& rb : batches) { auto datumBatch = TDatumBatch::FromRecordBatch(rb); - NArrow::TStatusValidator::Validate(ApplyAssignes(*datumBatch, NArrow::GetCustomExecContext())); + { + auto statusAssign = ApplyAssignes(*datumBatch, NArrow::GetCustomExecContext()); + if (!statusAssign.ok()) { + return statusAssign; + } + } NArrow::TColumnFilter local = NArrow::TColumnFilter::BuildAllowFilter(); NArrow::TStatusValidator::Validate(MakeCombinedFilter(*datumBatch, local)); AFL_VERIFY(local.Size() == datumBatch->Rows)("local", local.Size())("datum", datumBatch->Rows); @@ -906,30 +963,4 @@ std::set TProgram::GetProcessingColumns() const { return result; } -std::shared_ptr TProgram::ApplyEarlyFilter(std::shared_ptr& batch, const bool useFilter) const { - std::shared_ptr filter = std::make_shared(NArrow::TColumnFilter::BuildAllowFilter()); - for (ui32 i = 0; i < Steps.size(); ++i) { - try { - auto& step = Steps[i]; - if (!step->IsFilterOnly()) { - break; - } - - std::shared_ptr local = step->BuildFilter(batch); - AFL_VERIFY(local); - if (!useFilter) { - *filter = filter->And(*local); - } else { - *filter = filter->CombineSequentialAnd(*local); - if (!local->Apply(batch)) { - break; - } - } - } catch (const std::exception& ex) { - AFL_VERIFY(false); - } - } - return filter; -} - } diff --git a/ydb/core/formats/arrow/program.h b/ydb/core/formats/arrow/program.h index 000bd447b1e5..ed5f13403d02 100644 --- a/ydb/core/formats/arrow/program.h +++ b/ydb/core/formats/arrow/program.h @@ -44,6 +44,7 @@ struct TDatumBatch { arrow::Status AddColumn(const std::string& name, arrow::Datum&& column); arrow::Result GetColumnByName(const std::string& name) const; + std::shared_ptr ToTable() const; std::shared_ptr ToRecordBatch() const; static std::shared_ptr FromRecordBatch(const std::shared_ptr& batch); static std::shared_ptr FromTable(const std::shared_ptr& batch); @@ -353,7 +354,7 @@ class TProgramStep { return sb; } - std::set GetColumnsInUsage() const; + std::set GetColumnsInUsage(const bool originalOnly = false) const; const std::set& GetFilterOriginalColumnIds() const; @@ -390,7 +391,7 @@ class TProgramStep { arrow::Status Apply(std::shared_ptr& batch, arrow::compute::ExecContext* ctx) const; - arrow::Status ApplyAssignes(TDatumBatch& batch, arrow::compute::ExecContext* ctx) const; + [[nodiscard]] arrow::Status ApplyAssignes(TDatumBatch& batch, arrow::compute::ExecContext* ctx) const; arrow::Status ApplyAggregates(TDatumBatch& batch, arrow::compute::ExecContext* ctx) const; arrow::Status ApplyFilters(TDatumBatch& batch) const; arrow::Status ApplyProjection(std::shared_ptr& batch) const; @@ -402,7 +403,8 @@ class TProgramStep { return Filters.size() && (!GroupBy.size() && !GroupByKeys.size()); } - std::shared_ptr BuildFilter(const std::shared_ptr& t) const; + [[nodiscard]] arrow::Result> BuildFilter(const std::shared_ptr& t) const; + [[nodiscard]] arrow::Result> BuildFilter(const std::shared_ptr& t) const; }; struct TProgram { @@ -416,6 +418,18 @@ struct TProgram { : Steps(std::move(steps)) {} + arrow::Status ApplyTo(std::shared_ptr& table, arrow::compute::ExecContext* ctx) const { + std::vector> batches = NArrow::SliceToRecordBatches(table); + for (auto&& i : batches) { + auto status = ApplyTo(i, ctx); + if (!status.ok()) { + return status; + } + } + table = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches(batches)); + return arrow::Status::OK(); + } + arrow::Status ApplyTo(std::shared_ptr& batch, arrow::compute::ExecContext* ctx) const { try { for (auto& step : Steps) { @@ -432,7 +446,6 @@ struct TProgram { std::set GetEarlyFilterColumns() const; std::set GetProcessingColumns() const; - std::shared_ptr ApplyEarlyFilter(std::shared_ptr& batch, const bool useFilter) const; TString DebugString() const { TStringBuilder sb; sb << "["; @@ -444,11 +457,17 @@ struct TProgram { } }; +inline arrow::Status ApplyProgram( + std::shared_ptr& batch, + const TProgram& program, + arrow::compute::ExecContext* ctx = nullptr) { + return program.ApplyTo(batch, ctx); +} + inline arrow::Status ApplyProgram( std::shared_ptr& batch, const TProgram& program, - arrow::compute::ExecContext* ctx = nullptr) -{ + arrow::compute::ExecContext* ctx = nullptr) { return program.ApplyTo(batch, ctx); } diff --git a/ydb/core/formats/arrow/reader/batch_iterator.cpp b/ydb/core/formats/arrow/reader/batch_iterator.cpp new file mode 100644 index 000000000000..5216691dd208 --- /dev/null +++ b/ydb/core/formats/arrow/reader/batch_iterator.cpp @@ -0,0 +1,51 @@ +#include "batch_iterator.h" + +namespace NKikimr::NArrow::NMerger { + +NJson::TJsonValue TBatchIterator::DebugJson() const { + NJson::TJsonValue result; + result["is_cp"] = IsControlPoint(); + result["key"] = KeyColumns.DebugJson(); + return result; +} + +NKikimr::NArrow::NMerger::TSortableBatchPosition::TFoundPosition TBatchIterator::SkipToLower(const TSortableBatchPosition& pos) { + const ui32 posStart = KeyColumns.GetPosition(); + auto result = KeyColumns.SkipToLower(pos); + const i32 delta = IsReverse() ? (posStart - KeyColumns.GetPosition()) : (KeyColumns.GetPosition() - posStart); + AFL_VERIFY(delta >= 0); + AFL_VERIFY(VersionColumns.InitPosition(KeyColumns.GetPosition()))("pos", KeyColumns.GetPosition()) + ("size", VersionColumns.GetRecordsCount())("key_size", KeyColumns.GetRecordsCount()); + if (FilterIterator && delta) { + AFL_VERIFY(FilterIterator->Next(delta)); + } + return result; +} + +bool TBatchIterator::Next() { + const bool result = KeyColumns.NextPosition(ReverseSortKff) && VersionColumns.NextPosition(ReverseSortKff); + if (FilterIterator) { + Y_ABORT_UNLESS(result == FilterIterator->Next(1)); + } + return result; +} + +bool TBatchIterator::operator<(const TBatchIterator& item) const { + const std::partial_ordering result = KeyColumns.Compare(item.KeyColumns); + if (result == std::partial_ordering::equivalent) { + if (IsControlPoint() && item.IsControlPoint()) { + return false; + } else if (IsControlPoint()) { + return false; + } else if (item.IsControlPoint()) { + return true; + } + //don't need inverse through we need maximal version at first (reverse analytic not included in VersionColumns) + return VersionColumns.Compare(item.VersionColumns) == std::partial_ordering::less; + } else { + //inverse logic through we use max heap, but need minimal element if not reverse (reverse analytic included in KeyColumns) + return result == std::partial_ordering::greater; + } +} + +} diff --git a/ydb/core/formats/arrow/reader/batch_iterator.h b/ydb/core/formats/arrow/reader/batch_iterator.h new file mode 100644 index 000000000000..eec3559eb2b9 --- /dev/null +++ b/ydb/core/formats/arrow/reader/batch_iterator.h @@ -0,0 +1,89 @@ +#pragma once +#include "position.h" +#include + +namespace NKikimr::NArrow::NMerger { + +class TBatchIterator { +private: + bool ControlPointFlag; + TSortableBatchPosition KeyColumns; + TSortableBatchPosition VersionColumns; + i64 RecordsCount; + int ReverseSortKff; + + std::shared_ptr Filter; + std::shared_ptr FilterIterator; + + i32 GetFirstPosition() const { + if (ReverseSortKff > 0) { + return 0; + } else { + return RecordsCount - 1; + } + } + +public: + NJson::TJsonValue DebugJson() const; + + const std::shared_ptr& GetFilter() const { + return Filter; + } + + bool IsControlPoint() const { + return ControlPointFlag; + } + + const TSortableBatchPosition& GetKeyColumns() const { + return KeyColumns; + } + + const TSortableBatchPosition& GetVersionColumns() const { + return VersionColumns; + } + + TBatchIterator(const TSortableBatchPosition& keyColumns) + : ControlPointFlag(true) + , KeyColumns(keyColumns) { + + } + + template + TBatchIterator(std::shared_ptr batch, std::shared_ptr filter, + const std::vector& keyColumns, const std::vector& dataColumns, const bool reverseSort, const std::vector& versionColumnNames) + : ControlPointFlag(false) + , KeyColumns(batch, 0, keyColumns, dataColumns, reverseSort) + , VersionColumns(batch, 0, versionColumnNames, {}, false) + , RecordsCount(batch->num_rows()) + , ReverseSortKff(reverseSort ? -1 : 1) + , Filter(filter) { + Y_ABORT_UNLESS(KeyColumns.InitPosition(GetFirstPosition())); + Y_ABORT_UNLESS(VersionColumns.InitPosition(GetFirstPosition())); + if (Filter) { + FilterIterator = std::make_shared(Filter->GetIterator(reverseSort, RecordsCount)); + } + } + + bool CheckNextBatch(const TBatchIterator& nextIterator) { + return KeyColumns.Compare(nextIterator.KeyColumns) == std::partial_ordering::less; + } + + bool IsReverse() const { + return ReverseSortKff < 0; + } + + bool IsDeleted() const { + if (!FilterIterator) { + return false; + } + return !FilterIterator->GetCurrentAcceptance(); + } + + TSortableBatchPosition::TFoundPosition SkipToLower(const TSortableBatchPosition& pos); + + bool Next(); + + bool operator<(const TBatchIterator& item) const; +}; + +} diff --git a/ydb/core/formats/arrow/reader/heap.cpp b/ydb/core/formats/arrow/reader/heap.cpp new file mode 100644 index 000000000000..33f6a65369ec --- /dev/null +++ b/ydb/core/formats/arrow/reader/heap.cpp @@ -0,0 +1,5 @@ +#include "heap.h" + +namespace NKikimr::NArrow::NMerger { + +} diff --git a/ydb/core/formats/arrow/reader/heap.h b/ydb/core/formats/arrow/reader/heap.h new file mode 100644 index 000000000000..058066e9e7cb --- /dev/null +++ b/ydb/core/formats/arrow/reader/heap.h @@ -0,0 +1,126 @@ +#pragma once +#include + +#include +#include + +namespace NKikimr::NArrow::NMerger { + +class TRecordBatchBuilder; + +template +class TSortingHeap { +public: + TSortingHeap() = default; + + template + TSortingHeap(TCursors& cursors, bool notNull) { + Queue.reserve(cursors.size()); + for (auto& cur : cursors) { + if (!cur.Empty()) { + Queue.emplace_back(TSortCursor(&cur, notNull)); + } + } + std::make_heap(Queue.begin(), Queue.end()); + } + + const TSortCursor& Current() const { return Queue.front(); } + TSortCursor& MutableCurrent() { return Queue.front(); } + size_t Size() const { return Queue.size(); } + bool Empty() const { return Queue.empty(); } + TSortCursor& NextChild() { return Queue[NextChildIndex()]; } + + void Next() { + Y_ABORT_UNLESS(Size()); + + if (Queue.front().Next()) { + UpdateTop(); + } else { + RemoveTop(); + } + } + + void RemoveTop() { + std::pop_heap(Queue.begin(), Queue.end()); + Queue.pop_back(); + NextIdx = 0; + } + + void Push(TSortCursor&& cursor) { + Queue.emplace_back(cursor); + std::push_heap(Queue.begin(), Queue.end()); + NextIdx = 0; + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_ARRAY; + for (auto&& i : Queue) { + result.AppendValue(i.DebugJson()); + } + return result; + } + + /// This is adapted version of the function __sift_down from libc++. + /// Why cannot simply use std::priority_queue? + /// - because it doesn't support updating the top element and requires pop and push instead. + /// Also look at "Boost.Heap" library. + void UpdateTop() { + size_t size = Queue.size(); + if (size < 2) + return; + + auto begin = Queue.begin(); + + size_t child_idx = NextChildIndex(); + auto child_it = begin + child_idx; + + /// Check if we are in order. + if (*child_it < *begin) + return; + + NextIdx = 0; + + auto curr_it = begin; + auto top(std::move(*begin)); + do { + /// We are not in heap-order, swap the parent with it's largest child. + *curr_it = std::move(*child_it); + curr_it = child_it; + + // recompute the child based off of the updated parent + child_idx = 2 * child_idx + 1; + + if (child_idx >= size) + break; + + child_it = begin + child_idx; + + if ((child_idx + 1) < size && *child_it < *(child_it + 1)) { + /// Right child exists and is greater than left child. + ++child_it; + ++child_idx; + } + + /// Check if we are in order. + } while (!(*child_it < top)); + *curr_it = std::move(top); + } +private: + std::vector Queue; + /// Cache comparison between first and second child if the order in queue has not been changed. + size_t NextIdx = 0; + + size_t NextChildIndex() { + if (NextIdx == 0) { + NextIdx = 1; + if (Queue.size() > 2 && Queue[1] < Queue[2]) { + ++NextIdx; + } + } + + return NextIdx; + } + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/read_filter_merger.cpp b/ydb/core/formats/arrow/reader/merger.cpp similarity index 71% rename from ydb/core/tx/columnshard/engines/reader/read_filter_merger.cpp rename to ydb/core/formats/arrow/reader/merger.cpp index 0ac89d8ef3bc..83a43630f42d 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_filter_merger.cpp +++ b/ydb/core/formats/arrow/reader/merger.cpp @@ -1,9 +1,8 @@ -#include "read_filter_merger.h" -#include -#include -#include +#include "merger.h" +#include "result_builder.h" +#include -namespace NKikimr::NOlap::NIndexedReader { +namespace NKikimr::NArrow::NMerger { void TMergePartialStream::PutControlPoint(std::shared_ptr point) { Y_ABORT_UNLESS(point); @@ -14,24 +13,6 @@ void TMergePartialStream::PutControlPoint(std::shared_ptr batch, std::shared_ptr filter) { - if (!batch || !batch->num_rows()) { - return; - } - Y_DEBUG_ABORT_UNLESS(NArrow::IsSorted(batch, SortSchema)); - AddNewToHeap(batch, filter); -} - -void TMergePartialStream::AddNewToHeap(std::shared_ptr batch, std::shared_ptr filter) { - if (!filter || filter->IsTotalAllowFilter()) { - SortHeap.Push(TBatchIterator(batch, nullptr, SortSchema->field_names(), DataSchema ? DataSchema->field_names() : std::vector(), Reverse)); - } else if (filter->IsTotalDenyFilter()) { - return; - } else { - SortHeap.Push(TBatchIterator(batch, filter, SortSchema->field_names(), DataSchema ? DataSchema->field_names() : std::vector(), Reverse)); - } -} - void TMergePartialStream::RemoveControlPoint() { Y_ABORT_UNLESS(ControlPoints == 1); Y_ABORT_UNLESS(ControlPointEnriched()); @@ -56,16 +37,17 @@ void TMergePartialStream::CheckSequenceInDebug(const TSortableBatchPosition& nex #endif } -bool TMergePartialStream::DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition) { +bool TMergePartialStream::DrainToControlPoint(TRecordBatchBuilder& builder, const bool includeFinish, std::optional* lastResultPosition) { + AFL_VERIFY(ControlPoints == 1); Y_ABORT_UNLESS((ui32)DataSchema->num_fields() == builder.GetBuildersCount()); builder.ValidateDataSchema(DataSchema); - PutControlPoint(std::make_shared(readTo)); bool cpReachedFlag = false; - while (SortHeap.Size() && !cpReachedFlag) { + while (SortHeap.Size() && !cpReachedFlag && !builder.IsBufferExhausted()) { if (SortHeap.Current().IsControlPoint()) { + auto keyColumns = SortHeap.Current().GetKeyColumns(); RemoveControlPoint(); cpReachedFlag = true; - if (SortHeap.Empty() || !includeFinish || SortHeap.Current().GetKeyColumns().Compare(readTo) == std::partial_ordering::greater) { + if (SortHeap.Empty() || !includeFinish || SortHeap.Current().GetKeyColumns().Compare(keyColumns) == std::partial_ordering::greater) { return true; } } @@ -78,11 +60,16 @@ bool TMergePartialStream::DrainCurrentTo(TRecordBatchBuilder& builder, const TSo } } } - return false; + return cpReachedFlag; +} + +bool TMergePartialStream::DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition) { + PutControlPoint(std::make_shared(readTo)); + return DrainToControlPoint(builder, includeFinish, lastResultPosition); } -std::shared_ptr TMergePartialStream::SingleSourceDrain(const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition) { - std::shared_ptr result; +std::shared_ptr TMergePartialStream::SingleSourceDrain(const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition) { + std::shared_ptr result; if (SortHeap.Empty()) { return result; } @@ -147,14 +134,14 @@ std::shared_ptr TMergePartialStream::SingleSourceDrain(const SortHeap.UpdateTop(); } if (SortHeap.Empty()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("pos", readTo.DebugJson().GetStringRobust())("heap", "EMPTY"); + AFL_DEBUG(NKikimrServices::ARROW_HELPER)("pos", readTo.DebugJson().GetStringRobust())("heap", "EMPTY"); } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("pos", readTo.DebugJson().GetStringRobust())("heap", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); + AFL_DEBUG(NKikimrServices::ARROW_HELPER)("pos", readTo.DebugJson().GetStringRobust())("heap", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); } return result; } -bool TMergePartialStream::DrainAll(TRecordBatchBuilder& builder) { +void TMergePartialStream::DrainAll(TRecordBatchBuilder& builder) { Y_ABORT_UNLESS((ui32)DataSchema->num_fields() == builder.GetBuildersCount()); while (SortHeap.Size()) { if (auto currentPosition = DrainCurrentPosition()) { @@ -162,7 +149,6 @@ bool TMergePartialStream::DrainAll(TRecordBatchBuilder& builder) { builder.AddRecord(*currentPosition); } } - return false; } std::optional TMergePartialStream::DrainCurrentPosition() { @@ -197,14 +183,14 @@ std::vector> TMergePartialStream::DrainAllPa { std::vector> result; for (auto&& i : positions) { - NIndexedReader::TRecordBatchBuilder indexesBuilder(resultFields); + TRecordBatchBuilder indexesBuilder(resultFields); DrainCurrentTo(indexesBuilder, i.first, i.second); result.emplace_back(indexesBuilder.Finalize()); if (result.back()->num_rows() == 0) { result.pop_back(); } } - NIndexedReader::TRecordBatchBuilder indexesBuilder(resultFields); + TRecordBatchBuilder indexesBuilder(resultFields); DrainAll(indexesBuilder); result.emplace_back(indexesBuilder.Finalize()); if (result.back()->num_rows() == 0) { @@ -213,25 +199,33 @@ std::vector> TMergePartialStream::DrainAllPa return result; } -NJson::TJsonValue TMergePartialStream::TBatchIterator::DebugJson() const { - NJson::TJsonValue result; - result["is_cp"] = IsControlPoint(); - result["key"] = KeyColumns.DebugJson(); - return result; -} - -void TRecordBatchBuilder::ValidateDataSchema(const std::shared_ptr& schema) { - AFL_VERIFY(IsSameFieldsSequence(schema->fields(), Fields)); -} - -void TRecordBatchBuilder::AddRecord(const TSortableBatchPosition& position) { - Y_DEBUG_ABORT_UNLESS(position.GetData().GetColumns().size() == Builders.size()); - Y_DEBUG_ABORT_UNLESS(IsSameFieldsSequence(position.GetData().GetFields(), Fields)); -// AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "record_add_on_read")("record", position.DebugJson()); - for (ui32 i = 0; i < position.GetData().GetColumns().size(); ++i) { - NArrow::Append(*Builders[i], *position.GetData().GetColumns()[i], position.GetPosition()); +void TMergePartialStream::SkipToLowerBound(const TSortableBatchPosition& pos, const bool include) { + if (SortHeap.Empty()) { + return; + } + AFL_DEBUG(NKikimrServices::ARROW_HELPER)("pos", pos.DebugJson().GetStringRobust())("heap", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); + while (!SortHeap.Empty()) { + const auto cmpResult = SortHeap.Current().GetKeyColumns().Compare(pos); + if (cmpResult == std::partial_ordering::greater) { + break; + } + if (cmpResult == std::partial_ordering::equivalent && include) { + break; + } + const TSortableBatchPosition::TFoundPosition skipPos = SortHeap.MutableCurrent().SkipToLower(pos); + AFL_DEBUG(NKikimrServices::ARROW_HELPER)("pos", pos.DebugJson().GetStringRobust())("heap", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); + if (skipPos.IsEqual()) { + if (!include && !SortHeap.MutableCurrent().Next()) { + SortHeap.RemoveTop(); + } else { + SortHeap.UpdateTop(); + } + } else if (skipPos.IsLess()) { + SortHeap.RemoveTop(); + } else { + SortHeap.UpdateTop(); + } } - ++RecordsCount; } } diff --git a/ydb/core/formats/arrow/reader/merger.h b/ydb/core/formats/arrow/reader/merger.h new file mode 100644 index 000000000000..5ff296cae0e8 --- /dev/null +++ b/ydb/core/formats/arrow/reader/merger.h @@ -0,0 +1,103 @@ +#pragma once +#include "position.h" +#include "heap.h" +#include "result_builder.h" +#include "batch_iterator.h" + +#include + +namespace NKikimr::NArrow::NMerger { + +class TMergePartialStream { +private: +#ifndef NDEBUG + std::optional CurrentKeyColumns; +#endif + bool PossibleSameVersionFlag = true; + + std::shared_ptr SortSchema; + std::shared_ptr DataSchema; + const bool Reverse; + const std::vector VersionColumnNames; + ui32 ControlPoints = 0; + + TSortingHeap SortHeap; + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; +#ifndef NDEBUG + if (CurrentKeyColumns) { + result["current"] = CurrentKeyColumns->DebugJson(); + } +#endif + result.InsertValue("heap", SortHeap.DebugJson()); + return result; + } + + std::optional DrainCurrentPosition(); + + void CheckSequenceInDebug(const TSortableBatchPosition& nextKeyColumnsPosition); +public: + TMergePartialStream(std::shared_ptr sortSchema, std::shared_ptr dataSchema, const bool reverse, const std::vector& versionColumnNames) + : SortSchema(sortSchema) + , DataSchema(dataSchema) + , Reverse(reverse) + , VersionColumnNames(versionColumnNames) + { + Y_ABORT_UNLESS(SortSchema); + Y_ABORT_UNLESS(SortSchema->num_fields()); + Y_ABORT_UNLESS(!DataSchema || DataSchema->num_fields()); + } + + void SkipToLowerBound(const TSortableBatchPosition& pos, const bool include); + + void SetPossibleSameVersion(const bool value) { + PossibleSameVersionFlag = value; + } + + bool IsValid() const { + return SortHeap.Size(); + } + + ui32 GetSourcesCount() const { + return SortHeap.Size(); + } + + TString DebugString() const { + return TStringBuilder() << "sort_heap=" << SortHeap.DebugJson(); + } + + void PutControlPoint(std::shared_ptr point); + + void RemoveControlPoint(); + + bool ControlPointEnriched() const { + return SortHeap.Size() && SortHeap.Current().IsControlPoint(); + } + + template + void AddSource(const std::shared_ptr& batch, const std::shared_ptr& filter) { + if (!batch || !batch->num_rows()) { + return; + } + if (filter && filter->IsTotalDenyFilter()) { + return; + } +// Y_DEBUG_ABORT_UNLESS(NArrow::IsSorted(batch, SortSchema)); + auto filterImpl = (!filter || filter->IsTotalAllowFilter()) ? nullptr : filter; + SortHeap.Push(TBatchIterator(batch, filterImpl, SortSchema->field_names(), DataSchema ? DataSchema->field_names() : std::vector(), Reverse, VersionColumnNames)); + } + + bool IsEmpty() const { + return !SortHeap.Size(); + } + + void DrainAll(TRecordBatchBuilder& builder); + std::shared_ptr SingleSourceDrain(const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); + bool DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); + bool DrainToControlPoint(TRecordBatchBuilder& builder, const bool includeFinish, std::optional* lastResultPosition = nullptr); + std::vector> DrainAllParts(const std::map& positions, + const std::vector>& resultFields); +}; + +} diff --git a/ydb/core/formats/arrow/reader/read_filter_merger.cpp b/ydb/core/formats/arrow/reader/position.cpp similarity index 75% rename from ydb/core/formats/arrow/reader/read_filter_merger.cpp rename to ydb/core/formats/arrow/reader/position.cpp index 726767dbecc0..a11863b32b91 100644 --- a/ydb/core/formats/arrow/reader/read_filter_merger.cpp +++ b/ydb/core/formats/arrow/reader/position.cpp @@ -1,7 +1,7 @@ -#include "read_filter_merger.h" -#include +#include "position.h" +#include -namespace NKikimr::NOlap::NIndexedReader { +namespace NKikimr::NArrow::NMerger { NJson::TJsonValue TSortableBatchPosition::DebugJson() const { NJson::TJsonValue result; @@ -86,12 +86,35 @@ TSortableBatchPosition::TFoundPosition TSortableBatchPosition::SkipToLower(const return *pos; } +TSortableScanData::TSortableScanData(const std::shared_ptr& batch, const std::vector& columns) { + for (auto&& i : columns) { + auto c = batch->GetAccessorByNameOptional(i); + AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns))("batch", batch->DebugString()); + Columns.emplace_back(NAccessor::IChunkedArray::TReader(c)); + auto f = batch->GetSchema()->GetFieldByName(i); + AFL_VERIFY(f); + Fields.emplace_back(f); + } +} + TSortableScanData::TSortableScanData(const std::shared_ptr& batch, const std::vector& columns) { for (auto&& i : columns) { auto c = batch->GetColumnByName(i); AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns)); - Columns.emplace_back(c); + Columns.emplace_back(NAccessor::IChunkedArray::TReader(std::make_shared(c))); + auto f = batch->schema()->GetFieldByName(i); + AFL_VERIFY(f); + Fields.emplace_back(f); + } +} + +TSortableScanData::TSortableScanData(const std::shared_ptr& batch, const std::vector& columns) { + for (auto&& i : columns) { + auto c = batch->GetColumnByName(i); + AFL_VERIFY(c)("column_name", i)("columns", JoinSeq(",", columns)); + Columns.emplace_back(NAccessor::IChunkedArray::TReader(std::make_shared(c))); auto f = batch->schema()->GetFieldByName(i); + AFL_VERIFY(f); Fields.emplace_back(f); } } diff --git a/ydb/core/formats/arrow/reader/read_filter_merger.h b/ydb/core/formats/arrow/reader/position.h similarity index 83% rename from ydb/core/formats/arrow/reader/read_filter_merger.h rename to ydb/core/formats/arrow/reader/position.h index 9ed0befd0442..f2a070d7b1f2 100644 --- a/ydb/core/formats/arrow/reader/read_filter_merger.h +++ b/ydb/core/formats/arrow/reader/position.h @@ -1,44 +1,49 @@ #pragma once -#include -#include -#include -#include +#include #include -#include +#include +#include +#include + +#include #include -#include -#include -#include -namespace NKikimr::NOlap::NIndexedReader { +#include +#include +#include +#include +#include + +namespace NKikimr::NArrow::NMerger { class TRecordBatchBuilder; class TSortableScanData { private: - YDB_READONLY_DEF(std::vector>, Columns); + YDB_READONLY_DEF(std::vector, Columns); YDB_READONLY_DEF(std::vector>, Fields); public: TSortableScanData() = default; TSortableScanData(const std::shared_ptr& batch, const std::vector& columns); + TSortableScanData(const std::shared_ptr& batch, const std::vector& columns); + TSortableScanData(const std::shared_ptr& batch, const std::vector& columns); std::shared_ptr ExtractPosition(const ui64 pos) const { std::vector> columns; std::shared_ptr schema = std::make_shared(Fields); for (ui32 i = 0; i < Columns.size(); ++i) { - auto extracted = NArrow::CopyRecords(Columns[i], {pos}); + auto extracted = Columns[i].CopyRecord(pos); columns.emplace_back(extracted); } return arrow::RecordBatch::Make(schema, 1, columns); } - std::shared_ptr Slice(const ui64 offset, const ui64 count) const { - std::vector> slicedArrays; + std::shared_ptr Slice(const ui64 offset, const ui64 count) const { + std::vector> slicedArrays; for (auto&& i : Columns) { - AFL_VERIFY(offset + count <= (ui64)i->length())("offset", offset)("count", count)("length", i->length()); - slicedArrays.emplace_back(i->Slice(offset, count)); + slicedArrays.emplace_back(i.Slice(offset, count)); } - return arrow::RecordBatch::Make(std::make_shared(Fields), count, slicedArrays); + return arrow::Table::Make(std::make_shared(Fields), slicedArrays, count); } bool IsSameSchema(const std::shared_ptr& schema) const { @@ -65,8 +70,8 @@ class TSortableScanData { for (ui32 i = 0; i < Columns.size(); ++i) { auto& jsonColumn = result["sorting_columns"].AppendValue(NJson::JSON_MAP); jsonColumn["name"] = Fields[i]->name(); - if (position >= 0 && position < Columns[i]->length()) { - jsonColumn["value"] = NArrow::DebugString(Columns[i], position); + if (position >= 0 && (ui64)position < Columns[i].GetRecordsCount()) { + jsonColumn["value"] = Columns[i].DebugString(position); } } return result; @@ -84,7 +89,7 @@ class TSortableScanData { class TSortableBatchPosition { private: YDB_READONLY(i64, Position, 0); - i64 RecordsCount = 0; + YDB_READONLY(i64, RecordsCount, 0); bool ReverseSort = false; std::shared_ptr Sorting; std::shared_ptr Data; @@ -95,12 +100,12 @@ class TSortableBatchPosition { return Sorting->ExtractPosition(Position); } - std::shared_ptr SliceData(const ui64 offset, const ui64 count) const { + std::shared_ptr SliceData(const ui64 offset, const ui64 count) const { AFL_VERIFY(Data); return Data->Slice(offset, count); } - std::shared_ptr SliceKeys(const ui64 offset, const ui64 count) const { + std::shared_ptr SliceKeys(const ui64 offset, const ui64 count) const { AFL_VERIFY(Sorting); return Sorting->Slice(offset, count); } @@ -140,7 +145,9 @@ class TSortableBatchPosition { // (-inf, it1), [it1, it2), [it2, it3), ..., [itLast, +inf) template - static std::vector> SplitByBorders(const std::shared_ptr& batch, const std::vector& columnNames, TBordersIterator& it) { + static std::vector> SplitByBorders(const std::shared_ptr& batch, + const std::vector& columnNames, TBordersIterator& it) + { std::vector> result; if (!batch || batch->num_rows() == 0) { while (it.IsValid()) { @@ -245,7 +252,7 @@ class TSortableBatchPosition { static std::optional FindPosition(const std::shared_ptr& batch, const TSortableBatchPosition& forFound, const bool needGreater, const std::optional includedStartPosition); static std::optional FindPosition(TSortableBatchPosition& position, const ui64 posStart, const ui64 posFinish, const TSortableBatchPosition& forFound, const bool greater); - TSortableBatchPosition::TFoundPosition SkipToLower(const TSortableBatchPosition & forFound); + TSortableBatchPosition::TFoundPosition SkipToLower(const TSortableBatchPosition& forFound); const TSortableScanData& GetData() const { return *Data; @@ -268,13 +275,16 @@ class TSortableBatchPosition { return Sorting->IsSameSchema(schema); } - TSortableBatchPosition(const std::shared_ptr& batch, const ui32 position, const std::vector& sortingColumns, const std::vector& dataColumns, const bool reverseSort) + template + TSortableBatchPosition(const std::shared_ptr& batch, const ui32 position, const std::vector& sortingColumns, + const std::vector& dataColumns, const bool reverseSort) : Position(position) - , ReverseSort(reverseSort) + , ReverseSort(reverseSort) { Y_ABORT_UNLESS(batch); Y_ABORT_UNLESS(batch->num_rows()); RecordsCount = batch->num_rows(); + AFL_VERIFY(Position < RecordsCount)("position", Position)("count", RecordsCount); if (dataColumns.size()) { Data = std::make_shared(batch, dataColumns); @@ -287,7 +297,7 @@ class TSortableBatchPosition { std::partial_ordering Compare(const TSortableBatchPosition& item) const { Y_ABORT_UNLESS(item.ReverseSort == ReverseSort); Y_ABORT_UNLESS(item.Sorting->GetColumns().size() == Sorting->GetColumns().size()); - const auto directResult = NArrow::ColumnsCompare(Sorting->GetColumns(), Position, item.Sorting->GetColumns(), item.Position); + const auto directResult = NAccessor::IChunkedArray::TReader::CompareColumns(Sorting->GetColumns(), Position, item.Sorting->GetColumns(), item.Position); if (ReverseSort) { if (directResult == std::partial_ordering::less) { return std::partial_ordering::greater; @@ -324,9 +334,8 @@ class TSortableBatchPosition { } else { return false; } - - } + } }; } diff --git a/ydb/core/formats/arrow/reader/result_builder.cpp b/ydb/core/formats/arrow/reader/result_builder.cpp new file mode 100644 index 000000000000..5a7d79a08fed --- /dev/null +++ b/ydb/core/formats/arrow/reader/result_builder.cpp @@ -0,0 +1,71 @@ +#include "result_builder.h" + +#include + +#include +#include + +#include + +namespace NKikimr::NArrow::NMerger { + +void TRecordBatchBuilder::ValidateDataSchema(const std::shared_ptr& schema) { + AFL_VERIFY(IsSameFieldsSequence(schema->fields(), Fields)); +} + +void TRecordBatchBuilder::AddRecord(const TSortableBatchPosition& position) { + AFL_VERIFY_DEBUG(position.GetData().GetColumns().size() == Builders.size()); + AFL_VERIFY_DEBUG(IsSameFieldsSequence(position.GetData().GetFields(), Fields)); +// AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "record_add_on_read")("record", position.DebugJson()); + for (ui32 i = 0; i < position.GetData().GetColumns().size(); ++i) { + position.GetData().GetColumns()[i].AppendPositionTo(*Builders[i], position.GetPosition(), MemoryBufferLimit ? &CurrentBytesUsed : nullptr); + } + ++RecordsCount; +} + +bool TRecordBatchBuilder::IsSameFieldsSequence(const std::vector>& f1, const std::vector>& f2) { + if (f1.size() != f2.size()) { + return false; + } + for (ui32 i = 0; i < f1.size(); ++i) { + if (!f1[i]->Equals(f2[i])) { + return false; + } + } + return true; +} + +TRecordBatchBuilder::TRecordBatchBuilder(const std::vector>& fields, const std::optional rowsCountExpectation /*= {}*/, const THashMap& fieldDataSizePreallocated /*= {}*/) + : Fields(fields) +{ + AFL_VERIFY(Fields.size()); + for (auto&& f : fields) { + Builders.emplace_back(NArrow::MakeBuilder(f)); + auto it = fieldDataSizePreallocated.find(f->name()); + if (it != fieldDataSizePreallocated.end()) { + NArrow::ReserveData(*Builders.back(), it->second); + } + if (rowsCountExpectation) { + NArrow::TStatusValidator::Validate(Builders.back()->Reserve(*rowsCountExpectation)); + } + } +} + +std::shared_ptr TRecordBatchBuilder::Finalize() { + auto schema = std::make_shared(Fields); + std::vector> columns; + for (auto&& i : Builders) { + columns.emplace_back(NArrow::TStatusValidator::GetValid(i->Finish())); + } + return arrow::RecordBatch::Make(schema, columns.front()->length(), columns); +} + +TString TRecordBatchBuilder::GetColumnNames() const { + TStringBuilder result; + for (auto&& f : Fields) { + result << f->name() << ","; + } + return result; +} + +} diff --git a/ydb/core/formats/arrow/reader/result_builder.h b/ydb/core/formats/arrow/reader/result_builder.h new file mode 100644 index 000000000000..ba05e03cb934 --- /dev/null +++ b/ydb/core/formats/arrow/reader/result_builder.h @@ -0,0 +1,38 @@ +#pragma once +#include "position.h" +#include +#include +#include +#include + +namespace NKikimr::NArrow::NMerger { + +class TRecordBatchBuilder { +private: + std::vector> Builders; + YDB_READONLY_DEF(std::vector>, Fields); + YDB_READONLY(ui32, RecordsCount, 0); + YDB_ACCESSOR_DEF(std::optional, MemoryBufferLimit); + + ui64 CurrentBytesUsed = 0; + bool IsSameFieldsSequence(const std::vector>& f1, const std::vector>& f2); + +public: + ui32 GetBuildersCount() const { + return Builders.size(); + } + + TString GetColumnNames() const; + + TRecordBatchBuilder(const std::vector>& fields, const std::optional rowsCountExpectation = {}, const THashMap& fieldDataSizePreallocated = {}); + + std::shared_ptr Finalize(); + + bool IsBufferExhausted() const { + return MemoryBufferLimit && *MemoryBufferLimit < CurrentBytesUsed; + } + void AddRecord(const TSortableBatchPosition& position); + void ValidateDataSchema(const std::shared_ptr& schema); +}; + +} diff --git a/ydb/core/formats/arrow/reader/ya.make b/ydb/core/formats/arrow/reader/ya.make index 16e5877a6ee4..d57bb4e501ca 100644 --- a/ydb/core/formats/arrow/reader/ya.make +++ b/ydb/core/formats/arrow/reader/ya.make @@ -4,11 +4,17 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/formats/arrow/simple_builder ydb/core/formats/arrow/switch + ydb/core/formats/arrow/common ydb/library/actors/core + ydb/library/services ) SRCS( - read_filter_merger.cpp + batch_iterator.cpp + merger.cpp + position.cpp + heap.cpp + result_builder.cpp ) END() diff --git a/ydb/core/formats/arrow/replace_key.h b/ydb/core/formats/arrow/replace_key.h index f92a677050fb..f3b710fc7392 100644 --- a/ydb/core/formats/arrow/replace_key.h +++ b/ydb/core/formats/arrow/replace_key.h @@ -1,7 +1,10 @@ #pragma once #include "arrow_helpers.h" #include "permutations.h" + #include "common/validation.h" +#include "switch/compare.h" + #include #include @@ -121,14 +124,14 @@ class TReplaceKeyTemplate { std::partial_ordering CompareColumnValueNotNull(int column, const TReplaceKeyTemplate& key, int keyColumn) const { Y_DEBUG_ABORT_UNLESS(Column(column).type_id() == key.Column(keyColumn).type_id()); - return TypedCompare(Column(column), Position, key.Column(keyColumn), key.Position); + return TComparator::TypedCompare(Column(column), Position, key.Column(keyColumn), key.Position); } template std::partial_ordering CompareColumnValue(int column, const TReplaceKeyTemplate& key, int keyColumn) const { Y_DEBUG_ABORT_UNLESS(Column(column).type_id() == key.Column(keyColumn).type_id()); - return TypedCompare(Column(column), Position, key.Column(keyColumn), key.Position); + return TComparator::TypedCompare(Column(column), Position, key.Column(keyColumn), key.Position); } int Size() const { @@ -219,111 +222,6 @@ class TReplaceKeyTemplate { TArrayVecPtr Columns = nullptr; ui64 Position = 0; - template - static std::partial_ordering TypedCompare(const arrow::Array& lhs, int lpos, const arrow::Array& rhs, int rpos) { - arrow::Type::type typeId = lhs.type_id(); - switch (typeId) { - case arrow::Type::NA: - case arrow::Type::BOOL: - break; - case arrow::Type::UINT8: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::INT8: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::UINT16: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::INT16: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::UINT32: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::INT32: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::UINT64: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::INT64: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::HALF_FLOAT: - break; - case arrow::Type::FLOAT: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::DOUBLE: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::STRING: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::BINARY: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::FIXED_SIZE_BINARY: - case arrow::Type::DATE32: - case arrow::Type::DATE64: - break; - case arrow::Type::TIMESTAMP: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::TIME32: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::TIME64: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::DURATION: - return CompareView(lhs, lpos, rhs, rpos); - case arrow::Type::DECIMAL256: - case arrow::Type::DECIMAL: - case arrow::Type::DENSE_UNION: - case arrow::Type::DICTIONARY: - case arrow::Type::EXTENSION: - case arrow::Type::FIXED_SIZE_LIST: - case arrow::Type::INTERVAL_DAY_TIME: - case arrow::Type::INTERVAL_MONTHS: - case arrow::Type::LARGE_BINARY: - case arrow::Type::LARGE_LIST: - case arrow::Type::LARGE_STRING: - case arrow::Type::LIST: - case arrow::Type::MAP: - case arrow::Type::MAX_ID: - case arrow::Type::SPARSE_UNION: - case arrow::Type::STRUCT: - Y_ABORT("not implemented"); - break; - } - return std::partial_ordering::equivalent; - } - - template - static std::partial_ordering CompareView(const arrow::Array& lhs, int lpos, const arrow::Array& rhs, int rpos) { - auto& left = static_cast(lhs); - auto& right = static_cast(rhs); - if constexpr (notNull) { - return CompareValueNotNull(left.GetView(lpos), right.GetView(rpos)); - } else { - return CompareValue(left.GetView(lpos), right.GetView(rpos), left.IsNull(lpos), right.IsNull(rpos)); - } - } - - template - static std::partial_ordering CompareValue(const T& x, const T& y, bool xIsNull, bool yIsNull) { - // TODO: std::partial_ordering::unordered for both nulls? - if (xIsNull) { - return std::partial_ordering::less; - } - if (yIsNull) { - return std::partial_ordering::greater; - } - return CompareValueNotNull(x, y); - } - - template - static std::partial_ordering CompareValueNotNull(const T& x, const T& y) { - if constexpr (std::is_same_v) { - size_t minSize = (x.size() < y.size()) ? x.size() : y.size(); - int cmp = memcmp(x.data(), y.data(), minSize); - if (cmp < 0) { - return std::partial_ordering::less; - } else if (cmp > 0) { - return std::partial_ordering::greater; - } - return CompareValueNotNull(x.size(), y.size()); - } else { - return x <=> y; - } - } }; using TReplaceKey = TReplaceKeyTemplate>; diff --git a/ydb/core/formats/arrow/serializer/abstract.h b/ydb/core/formats/arrow/serializer/abstract.h index 6051f9fc90ce..1c8d9963dd7e 100644 --- a/ydb/core/formats/arrow/serializer/abstract.h +++ b/ydb/core/formats/arrow/serializer/abstract.h @@ -1,9 +1,12 @@ #pragma once +#include #include #include #include -#include +#include + +#include #include #include @@ -32,6 +35,9 @@ class ISerializer { using TProto = NKikimrSchemeOp::TOlapColumn::TSerializer; virtual ~ISerializer() = default; + virtual bool IsCompatibleForExchangeWithSameClass(const ISerializer& item) const = 0; + virtual bool IsEqualToSameClass(const ISerializer& item) const = 0; + TConclusionStatus DeserializeFromRequest(NYql::TFeaturesExtractor& features) { return DoDeserializeFromRequest(features); } @@ -49,13 +55,30 @@ class ISerializer { } TString SerializeFull(const std::shared_ptr& batch) const { + if (!batch) { + return ""; + } return DoSerializeFull(batch); } TString SerializePayload(const std::shared_ptr& batch) const { + if (!batch) { + return ""; + } return DoSerializePayload(batch); } + std::shared_ptr Repack(const std::shared_ptr& batch) { + if (!batch) { + return batch; + } + return TStatusValidator::GetValid(Deserialize(SerializeFull(batch))); + } + + TString Repack(const TString& batchString) { + return SerializeFull(TStatusValidator::GetValid(Deserialize(batchString))); + } + arrow::Result> Deserialize(const TString& data) const { if (!data) { return nullptr; @@ -87,6 +110,32 @@ class TSerializerContainer: public NBackgroundTasks::TInterfaceProtoContainerGetClassName() != item.GetObjectPtr()->GetClassName()) { + return false; + } + return GetObjectPtr()->IsCompatibleForExchangeWithSameClass(*item.GetObjectPtr()); + } + + bool IsEqualTo(const TSerializerContainer& item) const { + if (!GetObjectPtr() && !!item.GetObjectPtr()) { + return false; + } + if (!!GetObjectPtr() && !item.GetObjectPtr()) { + return false; + } + if (GetObjectPtr()->GetClassName() != item.GetObjectPtr()->GetClassName()) { + return false; + } + return GetObjectPtr()->IsEqualToSameClass(*item.GetObjectPtr()); + } + TString DebugString() const { if (GetObjectPtr()) { return GetObjectPtr()->DebugString(); @@ -101,6 +150,23 @@ class TSerializerContainer: public NBackgroundTasks::TInterfaceProtoContainer BuildFromProto(const NKikimrSchemeOp::TOlapColumn::TSerializer& proto) { + TSerializerContainer result; + if (!result.DeserializeFromProto(proto)) { + return TConclusionStatus::Fail("cannot parse proto for serializer construction: " + proto.DebugString()); + } + return result; + } + + static TConclusion BuildFromProto(const NKikimrSchemeOp::TCompressionOptions& proto) { + TSerializerContainer result; + auto parsed = result.DeserializeFromProto(proto); + if (!parsed) { + return parsed; + } + return result; + } }; } diff --git a/ydb/core/formats/arrow/serializer/native.h b/ydb/core/formats/arrow/serializer/native.h index ece3e9e34fc9..260b1f73c324 100644 --- a/ydb/core/formats/arrow/serializer/native.h +++ b/ydb/core/formats/arrow/serializer/native.h @@ -23,6 +23,26 @@ class TNativeSerializer: public ISerializer { TConclusion> BuildCodec(const arrow::Compression::type& cType, const std::optional level) const; static const inline TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetClassNameStatic()); protected: + virtual bool IsCompatibleForExchangeWithSameClass(const ISerializer& /*item*/) const override { + return true; + } + + virtual bool IsEqualToSameClass(const ISerializer& item) const override { + auto& itemOptions = static_cast(item).Options; + if (!!itemOptions.codec != !!Options.codec) { + return false; + } + if (!itemOptions.codec) { + return true; + } + if (itemOptions.codec->name() != Options.codec->name()) { + return false; + } + if (itemOptions.codec->compression_level() != Options.codec->compression_level()) { + return false; + } + return true; + } virtual TString DoSerializeFull(const std::shared_ptr& batch) const override; virtual TString DoSerializePayload(const std::shared_ptr& batch) const override; virtual arrow::Result> DoDeserialize(const TString& data) const override; diff --git a/ydb/core/formats/arrow/size_calcer.cpp b/ydb/core/formats/arrow/size_calcer.cpp index 838511202514..bff5f06aed40 100644 --- a/ydb/core/formats/arrow/size_calcer.cpp +++ b/ydb/core/formats/arrow/size_calcer.cpp @@ -147,6 +147,32 @@ ui64 GetBatchMemorySize(const std::shared_ptr& batch) { return bytes; } +ui64 GetTableMemorySize(const std::shared_ptr& batch) { + if (!batch) { + return 0; + } + ui64 bytes = 0; + for (auto& column : batch->columns()) { + for (auto&& chunk : column->chunks()) { + bytes += GetArrayMemorySize(chunk->data()); + } + } + return bytes; +} + +ui64 GetTableDataSize(const std::shared_ptr& batch) { + if (!batch) { + return 0; + } + ui64 bytes = 0; + for (auto& column : batch->columns()) { + for (auto&& chunk : column->chunks()) { + bytes += GetArrayDataSize(chunk); + } + } + return bytes; +} + template ui64 GetArrayDataSizeImpl(const std::shared_ptr& column) { return sizeof(typename TType::c_type) * column->length(); diff --git a/ydb/core/formats/arrow/size_calcer.h b/ydb/core/formats/arrow/size_calcer.h index 2bf5120ed280..83ee4da3f0f0 100644 --- a/ydb/core/formats/arrow/size_calcer.h +++ b/ydb/core/formats/arrow/size_calcer.h @@ -130,9 +130,11 @@ TSplitBlobResult SplitByBlobSize(const std::shared_ptr& batc // Return size in bytes including size of bitmap mask ui64 GetBatchDataSize(const std::shared_ptr& batch); +ui64 GetTableDataSize(const std::shared_ptr& batch); // Return size in bytes including size of bitmap mask ui64 GetArrayMemorySize(const std::shared_ptr& data); ui64 GetBatchMemorySize(const std::shared_ptr&batch); +ui64 GetTableMemorySize(const std::shared_ptr& batch); // Return size in bytes *not* including size of bitmap mask ui64 GetArrayDataSize(const std::shared_ptr& column); diff --git a/ydb/core/formats/arrow/sort_cursor.h b/ydb/core/formats/arrow/sort_cursor.h deleted file mode 100644 index f51f4145ae60..000000000000 --- a/ydb/core/formats/arrow/sort_cursor.h +++ /dev/null @@ -1,262 +0,0 @@ -// The code in this file is based on original ClickHouse source code -// which is licensed under Apache license v2.0 -// See: https://github.com/ClickHouse/ClickHouse/ - -#pragma once -#include "replace_key.h" -#include "arrow_helpers.h" -#include -#include - -namespace NKikimr::NArrow { - -/// Description of the sorting rule for several columns. -struct TSortDescription { - /// @note In case you have PK and snapshot column you should sort with {ASC PK, DESC snap} key and replase with PK - std::shared_ptr SortingKey; - std::shared_ptr ReplaceKey; /// Keep first visited (SortingKey ordered) of dups - std::vector Directions; /// 1 - ascending, -1 - descending. - bool NotNull{false}; - bool Reverse{false}; // Read sources from bottom to top. With inversed Directions leads to DESC dst for ASC src - - TSortDescription() = default; - - TSortDescription(const std::shared_ptr& sortingKey, - const std::shared_ptr& replaceKey = {}) - : SortingKey(sortingKey) - , ReplaceKey(replaceKey) - , Directions(sortingKey->num_fields(), 1) - {} - - size_t Size() const { return SortingKey->num_fields(); } - int Direction(size_t pos) const { return Directions[pos]; } - bool Replace() const { return ReplaceKey.get(); } - - void Inverse() { - Reverse = !Reverse; - for (int& dir : Directions) { - dir *= -1; - } - } -}; - - -/// Cursor allows to compare rows in different batches. -/// Cursor moves inside single block. It is used in priority queue. -struct TSortCursorImpl { - std::shared_ptr sort_columns; - std::shared_ptr desc; - ui32 order = 0; // Number of cursor. It determines an order if comparing columns are equal. - // - std::shared_ptr current_batch; - const TArrayVec* all_columns; - std::shared_ptr replace_columns; - - TSortCursorImpl() = default; - - TSortCursorImpl(std::shared_ptr batch, std::shared_ptr desc_, ui32 order_ = 0) - : desc(desc_) - , order(order_) - { - Reset(batch); - } - - bool Empty() const { return Rows() == 0; } - size_t Rows() const { return (!all_columns || all_columns->empty()) ? 0 : all_columns->front()->length(); } - size_t LastRow() const { return Rows() - 1; } - - void Reset(std::shared_ptr batch) { - current_batch = batch; - auto rbSorting = ExtractColumns(batch, desc->SortingKey); - Y_ABORT_UNLESS(rbSorting); - sort_columns = std::make_shared(rbSorting->columns()); - all_columns = &batch->columns(); - if (desc->ReplaceKey) { - auto rbReplace = ExtractColumns(batch, desc->ReplaceKey); - Y_ABORT_UNLESS(rbReplace); - replace_columns = std::make_shared(rbReplace->columns()); - } - pos = 0; - } - - size_t getRow() const { - return desc->Reverse ? (Rows() - pos - 1) : pos; - } - - bool isFirst() const { return pos == 0; } - bool isLast() const { return pos + 1 >= Rows(); } - bool isValid() const { return pos < Rows(); } - void next() { ++pos; } - -private: - size_t pos{0}; -}; - - -struct TSortCursor { - TSortCursorImpl* Impl; - bool NotNull; - - TSortCursor(TSortCursorImpl* impl, bool notNull) - : Impl(impl) - , NotNull(notNull) - {} - - TSortCursorImpl* operator-> () { return Impl; } - const TSortCursorImpl* operator-> () const { return Impl; } - - bool Greater(const TSortCursor& rhs) const { - return GreaterAt(rhs, Impl->getRow(), rhs.Impl->getRow()); - } - - /// Inverted so that the priority queue elements are removed in ascending order. - bool operator < (const TSortCursor& rhs) const { - return Greater(rhs); - } - -private: - /// The specified row of this cursor is greater than the specified row of another cursor. - bool GreaterAt(const TSortCursor& rhs, size_t lhs_pos, size_t rhs_pos) const { - TRawReplaceKey left(Impl->sort_columns.get(), lhs_pos); - TRawReplaceKey right(rhs.Impl->sort_columns.get(), rhs_pos); - - if (NotNull) { - for (size_t i = 0; i < Impl->desc->Size(); ++i) { - auto cmp = left.CompareColumnValueNotNull(i, right, i); - int res = Impl->desc->Direction(i) * (std::is_eq(cmp) ? 0 : (std::is_lt(cmp) ? -1 : 1)); - if (res > 0) - return true; - if (res < 0) - return false; - } - } else { - for (size_t i = 0; i < Impl->desc->Size(); ++i) { - auto cmp = left.CompareColumnValue(i, right, i); - int res = Impl->desc->Direction(i) * (std::is_eq(cmp) ? 0 : (std::is_lt(cmp) ? -1 : 1)); - if (res > 0) - return true; - if (res < 0) - return false; - } - } - return Impl->order > rhs.Impl->order; - } -}; - - -/// Allows to fetch data from multiple sort cursors in sorted order (merging sorted data stream). -/// TODO: Replace with "Loser Tree", see https://en.wikipedia.org/wiki/K-way_merge_algorithm -class TSortingHeap { -public: - TSortingHeap() = default; - - template - TSortingHeap(TCursors& cursors, bool notNull) { - Queue.reserve(cursors.size()); - for (auto& cur : cursors) { - if (!cur.Empty()) { - Queue.emplace_back(TSortCursor(&cur, notNull)); - } - } - std::make_heap(Queue.begin(), Queue.end()); - } - - bool IsValid() const { return !Queue.empty(); } - TSortCursor& Current() { return Queue.front(); } - size_t Size() { return Queue.size(); } - TSortCursor& NextChild() { return Queue[NextChildIndex()]; } - - void Next() { - Y_ABORT_UNLESS(IsValid()); - - if (!Current()->isLast()) { - Current()->next(); - UpdateTop(); - } - else { - RemoveTop(); - } - } - - void ReplaceTop(TSortCursor&& new_top) { - Current() = new_top; - UpdateTop(); - } - - void RemoveTop() { - std::pop_heap(Queue.begin(), Queue.end()); - Queue.pop_back(); - NextIdx = 0; - } - - void Push(TSortCursor&& cursor) { - Queue.emplace_back(cursor); - std::push_heap(Queue.begin(), Queue.end()); - NextIdx = 0; - } - -private: - std::vector Queue; - /// Cache comparison between first and second child if the order in queue has not been changed. - size_t NextIdx = 0; - - size_t NextChildIndex() { - if (NextIdx == 0) { - NextIdx = 1; - if (Queue.size() > 2 && Queue[1] < Queue[2]) { - ++NextIdx; - } - } - - return NextIdx; - } - - /// This is adapted version of the function __sift_down from libc++. - /// Why cannot simply use std::priority_queue? - /// - because it doesn't support updating the top element and requires pop and push instead. - /// Also look at "Boost.Heap" library. - void UpdateTop() { - size_t size = Queue.size(); - if (size < 2) - return; - - auto begin = Queue.begin(); - - size_t child_idx = NextChildIndex(); - auto child_it = begin + child_idx; - - /// Check if we are in order. - if (*child_it < *begin) - return; - - NextIdx = 0; - - auto curr_it = begin; - auto top(std::move(*begin)); - do { - /// We are not in heap-order, swap the parent with it's largest child. - *curr_it = std::move(*child_it); - curr_it = child_it; - - // recompute the child based off of the updated parent - child_idx = 2 * child_idx + 1; - - if (child_idx >= size) - break; - - child_it = begin + child_idx; - - if ((child_idx + 1) < size && *child_it < *(child_it + 1)) - { - /// Right child exists and is greater than left child. - ++child_it; - ++child_idx; - } - - /// Check if we are in order. - } while (!(*child_it < top)); - *curr_it = std::move(top); - } -}; - -} diff --git a/ydb/core/formats/arrow/special_keys.cpp b/ydb/core/formats/arrow/special_keys.cpp index b84fa44799c6..a654c4be6ef6 100644 --- a/ydb/core/formats/arrow/special_keys.cpp +++ b/ydb/core/formats/arrow/special_keys.cpp @@ -1,6 +1,9 @@ #include "special_keys.h" #include "permutations.h" -#include "reader/read_filter_merger.h" +#include "size_calcer.h" + +#include "reader/position.h" + #include #include #include @@ -32,6 +35,14 @@ TString TSpecialKeys::SerializeToStringDataOnlyNoCompression() const { return NArrow::SerializeBatchNoCompression(Data); } +ui64 TSpecialKeys::GetMemoryBytes() const { + return Data ? NArrow::GetBatchDataSize(Data) : 0; +} + +ui64 TSpecialKeys::GetMemorySize() const { + return GetBatchMemorySize(Data); +} + TFirstLastSpecialKeys::TFirstLastSpecialKeys(const std::shared_ptr& batch, const std::vector& columnNames /*= {}*/) { Y_ABORT_UNLESS(batch); Y_ABORT_UNLESS(batch->num_rows()); @@ -53,9 +64,9 @@ TMinMaxSpecialKeys::TMinMaxSpecialKeys(std::shared_ptr batch Y_ABORT_UNLESS(batch->num_rows()); Y_ABORT_UNLESS(schema); - NOlap::NIndexedReader::TSortableBatchPosition record(batch, 0, schema->field_names(), {}, false); - std::optional minValue; - std::optional maxValue; + NMerger::TSortableBatchPosition record(batch, 0, schema->field_names(), {}, false); + std::optional minValue; + std::optional maxValue; while (true) { if (!minValue || minValue->Compare(record) == std::partial_ordering::greater) { minValue = record; diff --git a/ydb/core/formats/arrow/special_keys.h b/ydb/core/formats/arrow/special_keys.h index f157ce089fe4..d56e658fbb68 100644 --- a/ydb/core/formats/arrow/special_keys.h +++ b/ydb/core/formats/arrow/special_keys.h @@ -20,6 +20,8 @@ class TSpecialKeys { } public: + ui64 GetMemoryBytes() const; + TString SerializeToStringDataOnlyNoCompression() const; TSpecialKeys(const TString& data, const std::shared_ptr& schema) { @@ -33,6 +35,7 @@ class TSpecialKeys { } TString SerializeToString() const; + ui64 GetMemorySize() const; }; class TFirstLastSpecialKeys: public TSpecialKeys { diff --git a/ydb/core/formats/arrow/switch/compare.cpp b/ydb/core/formats/arrow/switch/compare.cpp new file mode 100644 index 000000000000..4dacd9c5434f --- /dev/null +++ b/ydb/core/formats/arrow/switch/compare.cpp @@ -0,0 +1,5 @@ +#include "compare.h" + +namespace NKikimr::NArrow { + +} \ No newline at end of file diff --git a/ydb/core/formats/arrow/switch/compare.h b/ydb/core/formats/arrow/switch/compare.h new file mode 100644 index 000000000000..54beba09e6a8 --- /dev/null +++ b/ydb/core/formats/arrow/switch/compare.h @@ -0,0 +1,115 @@ +#pragma once +#include +#include + +namespace NKikimr::NArrow { + +class TComparator { +public: + template + static std::partial_ordering TypedCompare(const arrow::Array& lhs, const int lpos, const arrow::Array& rhs, const int rpos) { + arrow::Type::type typeId = lhs.type_id(); + switch (typeId) { + case arrow::Type::NA: + case arrow::Type::BOOL: + break; + case arrow::Type::UINT8: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::INT8: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::UINT16: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::INT16: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::UINT32: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::INT32: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::UINT64: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::INT64: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::HALF_FLOAT: + break; + case arrow::Type::FLOAT: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::DOUBLE: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::STRING: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::BINARY: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::FIXED_SIZE_BINARY: + case arrow::Type::DATE32: + case arrow::Type::DATE64: + break; + case arrow::Type::TIMESTAMP: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::TIME32: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::TIME64: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::DURATION: + return CompareView(lhs, lpos, rhs, rpos); + case arrow::Type::DECIMAL256: + case arrow::Type::DECIMAL: + case arrow::Type::DENSE_UNION: + case arrow::Type::DICTIONARY: + case arrow::Type::EXTENSION: + case arrow::Type::FIXED_SIZE_LIST: + case arrow::Type::INTERVAL_DAY_TIME: + case arrow::Type::INTERVAL_MONTHS: + case arrow::Type::LARGE_BINARY: + case arrow::Type::LARGE_LIST: + case arrow::Type::LARGE_STRING: + case arrow::Type::LIST: + case arrow::Type::MAP: + case arrow::Type::MAX_ID: + case arrow::Type::SPARSE_UNION: + case arrow::Type::STRUCT: + Y_ABORT("not implemented"); + break; + } + return std::partial_ordering::equivalent; + } + + template + static std::partial_ordering CompareView(const arrow::Array& lhs, int lpos, const arrow::Array& rhs, int rpos) { + auto& left = static_cast(lhs); + auto& right = static_cast(rhs); + if constexpr (notNull) { + return CompareValueNotNull(left.GetView(lpos), right.GetView(rpos)); + } else { + return CompareValue(left.GetView(lpos), right.GetView(rpos), left.IsNull(lpos), right.IsNull(rpos)); + } + } + + template + static std::partial_ordering CompareValue(const T& x, const T& y, bool xIsNull, bool yIsNull) { + // TODO: std::partial_ordering::unordered for both nulls? + if (xIsNull) { + return std::partial_ordering::less; + } + if (yIsNull) { + return std::partial_ordering::greater; + } + return CompareValueNotNull(x, y); + } + + template + static std::partial_ordering CompareValueNotNull(const T& x, const T& y) { + if constexpr (std::is_same_v) { + size_t minSize = (x.size() < y.size()) ? x.size() : y.size(); + int cmp = memcmp(x.data(), y.data(), minSize); + if (cmp < 0) { + return std::partial_ordering::less; + } else if (cmp > 0) { + return std::partial_ordering::greater; + } + return CompareValueNotNull(x.size(), y.size()); + } else { + return x <=> y; + } + } +}; +} \ No newline at end of file diff --git a/ydb/core/formats/arrow/switch/switch_type.h b/ydb/core/formats/arrow/switch/switch_type.h index a8627f4654c7..487144be7e9d 100644 --- a/ydb/core/formats/arrow/switch/switch_type.h +++ b/ydb/core/formats/arrow/switch/switch_type.h @@ -117,7 +117,7 @@ bool SwitchArrayType(const arrow::Datum& column, TFunc&& f) { * @return Result of execution of callback or false if the type typeId is not supported. */ template -bool SwitchYqlTypeToArrowType(const NScheme::TTypeInfo& typeInfo, TFunc&& callback) { +[[nodiscard]] bool SwitchYqlTypeToArrowType(const NScheme::TTypeInfo& typeInfo, TFunc&& callback) { switch (typeInfo.GetTypeId()) { case NScheme::NTypeIds::Bool: return callback(TTypeWrapper()); @@ -227,7 +227,7 @@ bool Append(arrow::ArrayBuilder& builder, const std::vector& } template -bool Append(T& builder, const arrow::Array& array, int position, ui64* recordSize = nullptr) { +[[nodiscard]] bool Append(T& builder, const arrow::Array& array, int position, ui64* recordSize = nullptr) { return SwitchType(array.type_id(), [&](const auto& type) { using TWrap = std::decay_t; using TArray = typename arrow::TypeTraits::ArrayType; diff --git a/ydb/core/formats/arrow/switch/ya.make b/ydb/core/formats/arrow/switch/ya.make index b32eb15f2ed1..e11e5e070ca6 100644 --- a/ydb/core/formats/arrow/switch/ya.make +++ b/ydb/core/formats/arrow/switch/ya.make @@ -8,6 +8,7 @@ PEERDIR( SRCS( switch_type.cpp + compare.cpp ) END() diff --git a/ydb/core/formats/arrow/transformer/abstract.h b/ydb/core/formats/arrow/transformer/abstract.h index 6d99379f8b97..b4c397c2d773 100644 --- a/ydb/core/formats/arrow/transformer/abstract.h +++ b/ydb/core/formats/arrow/transformer/abstract.h @@ -10,14 +10,24 @@ class ITransformer { protected: virtual std::shared_ptr DoTransform(const std::shared_ptr& batch) const = 0; virtual TString DoDebugString() const = 0; + virtual bool IsEqualToSameClass(const ITransformer& item) const = 0; public: using TPtr = std::shared_ptr; virtual ~ITransformer() = default; + virtual TString GetClassName() const = 0; + TString DebugString() const { return DoDebugString(); } + bool IsEqualTo(const ITransformer& item) const { + if (GetClassName() != item.GetClassName()) { + return false; + } + return IsEqualToSameClass(item); + } + std::shared_ptr Transform(const std::shared_ptr& batch) const { return DoTransform(batch); } diff --git a/ydb/core/formats/arrow/transformer/dictionary.h b/ydb/core/formats/arrow/transformer/dictionary.h index a029b956e0fc..da0c13a5189a 100644 --- a/ydb/core/formats/arrow/transformer/dictionary.h +++ b/ydb/core/formats/arrow/transformer/dictionary.h @@ -4,21 +4,41 @@ namespace NKikimr::NArrow::NTransformation { class TDictionaryPackTransformer: public ITransformer { +public: + static TString GetClassNameStatic() { + return "DICT_PACK"; + } protected: virtual std::shared_ptr DoTransform(const std::shared_ptr& batch) const override; virtual TString DoDebugString() const override { return "type=DICT_PACK;"; } + virtual bool IsEqualToSameClass(const ITransformer& /*item*/) const override { + return true; + } public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } }; class TDictionaryUnpackTransformer: public ITransformer { +public: + static TString GetClassNameStatic() { + return "DICT_UNPACK"; + } protected: virtual std::shared_ptr DoTransform(const std::shared_ptr& batch) const override; virtual TString DoDebugString() const override { return "type=DICT_UNPACK;"; } + virtual bool IsEqualToSameClass(const ITransformer& /*item*/) const override { + return true; + } public: + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } }; } diff --git a/ydb/core/formats/arrow/ut_arrow.cpp b/ydb/core/formats/arrow/ut/ut_arrow.cpp similarity index 79% rename from ydb/core/formats/arrow/ut_arrow.cpp rename to ydb/core/formats/arrow/ut/ut_arrow.cpp index 2ab400bf4917..da620d70fa30 100644 --- a/ydb/core/formats/arrow/ut_arrow.cpp +++ b/ydb/core/formats/arrow/ut/ut_arrow.cpp @@ -1,9 +1,10 @@ -#include "arrow_batch_builder.h" -#include "arrow_helpers.h" -#include "converter.h" -#include "one_batch_input_stream.h" -#include "merging_sorted_input_stream.h" -#include "arrow_filter.h" +#include +#include +#include +#include +#include +#include +#include #include #include @@ -480,13 +481,6 @@ ui32 RestoreValue(ui32 a, ui32 b, ui32 c) { return ui32(a) * 100 + b * 10 + c; } -ui32 RestoreOne(const std::shared_ptr& batch, int pos) { - auto arrA = std::static_pointer_cast(batch->GetColumnByName("i8")); - auto arrB = std::static_pointer_cast(batch->GetColumnByName("i16")); - auto arrC = std::static_pointer_cast(batch->GetColumnByName("i32")); - return RestoreValue(arrA->Value(pos), arrB->Value(pos), arrC->Value(pos)); -} - bool CheckSorted1000(const std::shared_ptr& batch, bool desc = false) { auto arrA = std::static_pointer_cast(batch->GetColumnByName("i8")); auto arrB = std::static_pointer_cast(batch->GetColumnByName("i16")); @@ -663,38 +657,26 @@ Y_UNIT_TEST_SUITE(ArrowTest) { UNIT_ASSERT(CheckSorted1000(batch)); std::vector> batches; - batches.push_back(batch->Slice(0, 100)); // 0..100 - batches.push_back(batch->Slice(100, 200)); // 100..300 - batches.push_back(batch->Slice(200, 400)); // 200..600 - batches.push_back(batch->Slice(500, 50)); // 500..550 - batches.push_back(batch->Slice(600, 1)); // 600..601 - - auto descr = std::make_shared(batch->schema()); - descr->NotNull = true; - - std::vector> sorted; - { // maxBatchSize = 500, no limit - std::vector streams; - for (auto& batch : batches) { - streams.push_back(std::make_shared(batch)); - } + batches.push_back(batch->Slice(0, 100)); // 0..100 +100 + batches.push_back(batch->Slice(100, 200)); // 100..300 +200 + batches.push_back(batch->Slice(200, 400)); // 200..600 +300 + batches.push_back(batch->Slice(500, 50)); // 500..550 +50 + batches.push_back(batch->Slice(600, 1)); // 600..601 +1 - NArrow::IInputStream::TPtr mergeStream = - std::make_shared(streams, descr, 500); - - while (auto batch = mergeStream->Read()) { - sorted.emplace_back(batch); + std::shared_ptr sorted; + { + NArrow::NMerger::TRecordBatchBuilder builder(batch->schema()->fields()); + const std::vector vColumns = {batch->schema()->field(0)->name()}; + auto merger = std::make_shared(batch->schema(), batch->schema(), false, vColumns); + for (auto&& i : batches) { + merger->AddSource(i, nullptr); } + merger->DrainAll(builder); + sorted = builder.Finalize(); } - - UNIT_ASSERT_VALUES_EQUAL(sorted.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(sorted[0]->num_rows(), 500); - UNIT_ASSERT_VALUES_EQUAL(sorted[1]->num_rows(), 251); - UNIT_ASSERT(CheckSorted(sorted[0])); - UNIT_ASSERT(CheckSorted(sorted[1])); - UNIT_ASSERT(NArrow::IsSorted(sorted[0], descr->SortingKey)); - UNIT_ASSERT(NArrow::IsSorted(sorted[1], descr->SortingKey)); - UNIT_ASSERT(RestoreOne(sorted[0], 499) <= RestoreOne(sorted[1], 0)); + UNIT_ASSERT_VALUES_EQUAL(sorted->num_rows(), 601); + UNIT_ASSERT(NArrow::IsSorted(sorted, batch->schema())); + UNIT_ASSERT(CheckSorted(sorted)); } Y_UNIT_TEST(MergingSortedInputStreamReversed) { @@ -702,86 +684,29 @@ Y_UNIT_TEST_SUITE(ArrowTest) { UNIT_ASSERT(CheckSorted1000(batch)); std::vector> batches; - batches.push_back(batch->Slice(0, 100)); // 0..100 - batches.push_back(batch->Slice(100, 200)); // 100..300 - batches.push_back(batch->Slice(200, 400)); // 200..600 - batches.push_back(batch->Slice(500, 50)); // 500..550 - batches.push_back(batch->Slice(600, 1)); // 600..601 - - auto descr = std::make_shared(batch->schema()); - descr->NotNull = true; - descr->Inverse(); - - std::vector> sorted; - { // maxBatchSize = 500, no limit - std::vector streams; - for (auto& batch : batches) { - streams.push_back(std::make_shared(batch)); - } - - NArrow::IInputStream::TPtr mergeStream = - std::make_shared(streams, descr, 500); + batches.push_back(batch->Slice(0, 100)); // 0..100 +100 + batches.push_back(batch->Slice(100, 200)); // 100..300 +200 + batches.push_back(batch->Slice(200, 400)); // 200..600 +300 + batches.push_back(batch->Slice(500, 50)); // 500..550 +50 + batches.push_back(batch->Slice(600, 1)); // 600..601 +1 - while (auto batch = mergeStream->Read()) { - sorted.emplace_back(batch); - } - } - - UNIT_ASSERT_VALUES_EQUAL(sorted.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(sorted[0]->num_rows(), 500); - UNIT_ASSERT_VALUES_EQUAL(sorted[1]->num_rows(), 251); - UNIT_ASSERT(CheckSorted(sorted[0], true)); - UNIT_ASSERT(CheckSorted(sorted[1], true)); - UNIT_ASSERT(NArrow::IsSorted(sorted[0], descr->SortingKey, true)); - UNIT_ASSERT(NArrow::IsSorted(sorted[1], descr->SortingKey, true)); - UNIT_ASSERT(RestoreOne(sorted[0], 499) >= RestoreOne(sorted[1], 0)); - } - - Y_UNIT_TEST(MergingSortedInputStreamReplace) { - std::shared_ptr batch = ExtractBatch(MakeTable1000()); - UNIT_ASSERT(CheckSorted1000(batch)); - - std::vector> batches; - batches.push_back(AddSnapColumn(batch->Slice(0, 400), 0)); - batches.push_back(AddSnapColumn(batch->Slice(200, 400), 1)); - batches.push_back(AddSnapColumn(batch->Slice(400, 400), 2)); - batches.push_back(AddSnapColumn(batch->Slice(600, 400), 3)); - - auto sortingKey = batches[0]->schema(); - auto replaceKey = batch->schema(); - - auto descr = std::make_shared(sortingKey, replaceKey); - descr->Directions.back() = -1; // greater snapshot first - descr->NotNull = true; - - std::vector> sorted; + std::shared_ptr sorted; { - std::vector streams; - for (auto& batch : batches) { - streams.push_back(std::make_shared(batch)); - } - - NArrow::IInputStream::TPtr mergeStream = - std::make_shared(streams, descr, 5000); - - while (auto batch = mergeStream->Read()) { - sorted.emplace_back(batch); + NArrow::NMerger::TRecordBatchBuilder builder(batch->schema()->fields()); + const std::vector vColumns = {batch->schema()->field(0)->name()}; + auto merger = std::make_shared(batch->schema(), batch->schema(), true, vColumns); + for (auto&& i : batches) { + merger->AddSource(i, nullptr); } + merger->DrainAll(builder); + sorted = builder.Finalize(); } - - UNIT_ASSERT_VALUES_EQUAL(sorted.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(sorted[0]->num_rows(), 1000); - UNIT_ASSERT(CheckSorted1000(sorted[0])); - UNIT_ASSERT(NArrow::IsSortedAndUnique(sorted[0], descr->SortingKey)); - - auto counts = CountValues(std::static_pointer_cast(sorted[0]->GetColumnByName("snap"))); - UNIT_ASSERT_VALUES_EQUAL(counts[0], 200); - UNIT_ASSERT_VALUES_EQUAL(counts[1], 200); - UNIT_ASSERT_VALUES_EQUAL(counts[2], 200); - UNIT_ASSERT_VALUES_EQUAL(counts[3], 400); + UNIT_ASSERT_VALUES_EQUAL(sorted->num_rows(), 601); + UNIT_ASSERT(NArrow::IsSorted(sorted, batch->schema(), true)); + UNIT_ASSERT(CheckSorted(sorted, true)); } - Y_UNIT_TEST(MergingSortedInputStreamReplaceReversed) { + Y_UNIT_TEST(MergingSortedInputStreamReplace) { std::shared_ptr batch = ExtractBatch(MakeTable1000()); UNIT_ASSERT(CheckSorted1000(batch)); @@ -791,35 +716,23 @@ Y_UNIT_TEST_SUITE(ArrowTest) { batches.push_back(AddSnapColumn(batch->Slice(400, 400), 2)); batches.push_back(AddSnapColumn(batch->Slice(600, 400), 3)); - auto sortingKey = batches[0]->schema(); - auto replaceKey = batch->schema(); - - auto descr = std::make_shared(sortingKey, replaceKey); - descr->Directions.back() = 1; // greater snapshot last - descr->NotNull = true; - descr->Inverse(); - - std::vector> sorted; + std::shared_ptr sorted; { - std::vector streams; - for (auto& batch : batches) { - streams.push_back(std::make_shared(batch)); - } - - NArrow::IInputStream::TPtr mergeStream = - std::make_shared(streams, descr, 5000); - - while (auto batch = mergeStream->Read()) { - sorted.emplace_back(batch); + NArrow::NMerger::TRecordBatchBuilder builder(batches[0]->schema()->fields()); + const std::vector vColumns = {"snap"}; + auto merger = std::make_shared(batch->schema(), batches[0]->schema(), false, vColumns); + for (auto&& i : batches) { + merger->AddSource(i, nullptr); } + merger->DrainAll(builder); + sorted = builder.Finalize(); } - UNIT_ASSERT_VALUES_EQUAL(sorted.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(sorted[0]->num_rows(), 1000); - UNIT_ASSERT(CheckSorted1000(sorted[0], true)); - UNIT_ASSERT(NArrow::IsSortedAndUnique(sorted[0], descr->SortingKey, true)); + UNIT_ASSERT_VALUES_EQUAL(sorted->num_rows(), 1000); + UNIT_ASSERT(CheckSorted1000(sorted)); + UNIT_ASSERT(NArrow::IsSortedAndUnique(sorted, batch->schema())); - auto counts = CountValues(std::static_pointer_cast(sorted[0]->GetColumnByName("snap"))); + auto counts = CountValues(std::static_pointer_cast(sorted->GetColumnByName("snap"))); UNIT_ASSERT_VALUES_EQUAL(counts[0], 200); UNIT_ASSERT_VALUES_EQUAL(counts[1], 200); UNIT_ASSERT_VALUES_EQUAL(counts[2], 200); diff --git a/ydb/core/formats/arrow/ut_program_step.cpp b/ydb/core/formats/arrow/ut/ut_program_step.cpp similarity index 99% rename from ydb/core/formats/arrow/ut_program_step.cpp rename to ydb/core/formats/arrow/ut/ut_program_step.cpp index 2ab52ed29bdc..d7f447a1b237 100644 --- a/ydb/core/formats/arrow/ut_program_step.cpp +++ b/ydb/core/formats/arrow/ut/ut_program_step.cpp @@ -2,14 +2,16 @@ #include #include +#include +#include +#include +#include + +#include + #include #include #include -#include -#include -#include "custom_registry.h" -#include "program.h" -#include "arrow_helpers.h" using namespace NKikimr::NArrow; using namespace NKikimr::NSsa; diff --git a/ydb/core/formats/arrow/ut/ut_size_calcer.cpp b/ydb/core/formats/arrow/ut/ut_size_calcer.cpp index 24d2c52d9217..1db712f43c7a 100644 --- a/ydb/core/formats/arrow/ut/ut_size_calcer.cpp +++ b/ydb/core/formats/arrow/ut/ut_size_calcer.cpp @@ -16,6 +16,12 @@ Y_UNIT_TEST_SUITE(SizeCalcer) { std::shared_ptr batch = NConstruction::TRecordBatchConstructor({ column }).BuildBatch(2048); Cerr << GetBatchDataSize(batch) << Endl; UNIT_ASSERT(GetBatchDataSize(batch) == 2048 * 512 + 2048 * 4); + auto slice05 = batch->Slice(batch->num_rows() / 2, batch->num_rows() / 2); + Cerr << GetBatchDataSize(slice05) << Endl; + UNIT_ASSERT(GetBatchDataSize(slice05) == 0.5 * (2048 * 512 + 2048 * 4)); + auto slice025 = slice05->Slice(slice05->num_rows() / 3, slice05->num_rows() / 2); + Cerr << GetBatchDataSize(slice025) << Endl; + UNIT_ASSERT(GetBatchDataSize(slice025) == 0.25 * (2048 * 512 + 2048 * 4)); } Y_UNIT_TEST(DictionaryStrings) { diff --git a/ydb/core/formats/arrow/ya.make b/ydb/core/formats/arrow/ya.make index 146c5bae1e86..d55ccb5799e5 100644 --- a/ydb/core/formats/arrow/ya.make +++ b/ydb/core/formats/arrow/ya.make @@ -45,14 +45,10 @@ SRCS( converter.h custom_registry.cpp input_stream.h - merging_sorted_input_stream.cpp - merging_sorted_input_stream.h - one_batch_input_stream.h permutations.cpp program.cpp replace_key.cpp size_calcer.cpp - sort_cursor.h ssa_program_optimizer.cpp special_keys.cpp simple_arrays_cache.cpp diff --git a/ydb/core/grpc_services/rpc_load_rows.cpp b/ydb/core/grpc_services/rpc_load_rows.cpp index 05cc85065298..f1d17adef38e 100644 --- a/ydb/core/grpc_services/rpc_load_rows.cpp +++ b/ydb/core/grpc_services/rpc_load_rows.cpp @@ -462,8 +462,12 @@ class TUploadColumnsRPCPublic : public NTxProxy::TUploadRowsBaseSetSkipRows(skipRows); if (!delimiter.empty()) { if (delimiter.size() != 1) { @@ -471,20 +475,20 @@ class TUploadColumnsRPCPublic : public NTxProxy::TUploadRowsBaseSetDelimiter(delimiter[0]); } if (!nullValue.empty()) { - reader.SetNullValue(nullValue); + reader->SetNullValue(nullValue); } if (data.size() > NFormats::TArrowCSV::DEFAULT_BLOCK_SIZE) { ui32 blockSize = NFormats::TArrowCSV::DEFAULT_BLOCK_SIZE; blockSize *= data.size() / blockSize + 1; - reader.SetBlockSize(blockSize); + reader->SetBlockSize(blockSize); } - Batch = reader.ReadSingleBatch(data, errorMessage); + Batch = reader->ReadSingleBatch(data, errorMessage); if (!Batch) { return false; } diff --git a/ydb/core/io_formats/arrow/csv_arrow.cpp b/ydb/core/io_formats/arrow/csv_arrow.cpp index 06b070d76db2..36113047b92f 100644 --- a/ydb/core/io_formats/arrow/csv_arrow.cpp +++ b/ydb/core/io_formats/arrow/csv_arrow.cpp @@ -5,6 +5,7 @@ #include #include +#include namespace NKikimr::NFormats { @@ -42,7 +43,30 @@ class TimestampIntParser: public arrow::TimestampParser { } -TArrowCSV::TArrowCSV(const TVector>& columns, bool header, const std::set& notNullColumns) +arrow::Result TArrowCSV::Create(const TVector>& columns, bool header, const std::set& notNullColumns) { + TVector errors; + TColummns convertedColumns; + convertedColumns.reserve(columns.size()); + for (auto& [name, type] : columns) { + const auto arrowType = NArrow::GetArrowType(type); + if (!arrowType.ok()) { + errors.emplace_back("column " + name + ": " + arrowType.status().ToString()); + continue; + } + const auto csvArrowType = NArrow::GetCSVArrowType(type); + if (!csvArrowType.ok()) { + errors.emplace_back("column " + name + ": " + csvArrowType.status().ToString()); + continue; + } + convertedColumns.emplace_back(TColumnInfo{name, *arrowType, *csvArrowType}); + } + if (!errors.empty()) { + return arrow::Status::TypeError(ErrorPrefix() + "columns errors: " + JoinSeq("; ", errors)); + } + return TArrowCSV(convertedColumns, header, notNullColumns); +} + +TArrowCSV::TArrowCSV(const TColummns& columns, bool header, const std::set& notNullColumns) : ReadOptions(arrow::csv::ReadOptions::Defaults()) , ParseOptions(arrow::csv::ParseOptions::Defaults()) , ConvertOptions(arrow::csv::ConvertOptions::Defaults()) @@ -60,21 +84,19 @@ TArrowCSV::TArrowCSV(const TVector>& colu // !autogenerate + column_names.empty() => read from CSV ResultColumns.reserve(columns.size()); - for (auto& [name, type] : columns) { - ResultColumns.push_back(name); - std::string columnName(name.data(), name.size()); - ConvertOptions.column_types[columnName] = NArrow::GetCSVArrowType(type); - OriginalColumnTypes[columnName] = NArrow::GetArrowType(type); + for (const auto& col: columns) { + ResultColumns.push_back(col.Name); + ConvertOptions.column_types[col.Name] = col.CsvArrowType; + OriginalColumnTypes[col.Name] = col.ArrowType; } } else if (!columns.empty()) { // !autogenerate + !column_names.empty() => specified columns ReadOptions.column_names.reserve(columns.size()); - for (auto& [name, type] : columns) { - std::string columnName(name.data(), name.size()); - ReadOptions.column_names.push_back(columnName); - ConvertOptions.column_types[columnName] = NArrow::GetCSVArrowType(type); - OriginalColumnTypes[columnName] = NArrow::GetArrowType(type); + for (const auto& col: columns) { + ReadOptions.column_names.push_back(col.Name); + ConvertOptions.column_types[col.Name] = col.CsvArrowType; + OriginalColumnTypes[col.Name] = col.ArrowType; } #if 0 } else { diff --git a/ydb/core/io_formats/arrow/csv_arrow.h b/ydb/core/io_formats/arrow/csv_arrow.h index a4e5b912f664..49fd0cd84c62 100644 --- a/ydb/core/io_formats/arrow/csv_arrow.h +++ b/ydb/core/io_formats/arrow/csv_arrow.h @@ -13,7 +13,7 @@ class TArrowCSV { /// If header is true read column names from first line after skipRows. Parse columns as strings in this case. /// @note It's possible to skip header with skipRows and use typed columns instead. - TArrowCSV(const TVector>& columns, bool header = false, const std::set& notNullColumns = {}); + static arrow::Result Create(const TVector>& columns, bool header = false, const std::set& notNullColumns = {}); std::shared_ptr ReadNext(const TString& csv, TString& errString); std::shared_ptr ReadSingleBatch(const TString& csv, TString& errString); @@ -50,6 +50,13 @@ class TArrowCSV { void SetNullValue(const TString& null = ""); private: + struct TColumnInfo { + TString Name; + std::shared_ptr ArrowType; + std::shared_ptrCsvArrowType; + }; + using TColummns = TVector; + TArrowCSV(const TColummns& columns, bool header, const std::set& notNullColumns); arrow::csv::ReadOptions ReadOptions; arrow::csv::ParseOptions ParseOptions; arrow::csv::ConvertOptions ConvertOptions; diff --git a/ydb/core/io_formats/arrow/csv_arrow_ut.cpp b/ydb/core/io_formats/arrow/csv_arrow_ut.cpp index 5716126eb83e..aa37ebbdbc2a 100644 --- a/ydb/core/io_formats/arrow/csv_arrow_ut.cpp +++ b/ydb/core/io_formats/arrow/csv_arrow_ut.cpp @@ -53,7 +53,9 @@ TestReadSingleBatch(TArrowCSV& reader, for (size_t i = 0; i < columns.size(); ++i) { UNIT_ASSERT_EQUAL(columns[i].first, batch->schema()->field(i)->name()); - UNIT_ASSERT(NArrow::GetArrowType(columns[i].second)->Equals(batch->schema()->field(i)->type())); + auto arrowType = NArrow::GetArrowType(columns[i].second); + UNIT_ASSERT_C(arrowType.ok(), arrowType.status().ToString()); + UNIT_ASSERT(arrowType.ValueUnsafe()->Equals(batch->schema()->field(i)->type())); // TODO: check data } return batch; @@ -62,16 +64,17 @@ TestReadSingleBatch(TArrowCSV& reader, std::shared_ptr TestReadSingleBatch(const TVector>& columns, const TString& data, char delimiter, bool header, ui32 numRows, ui32 skipRows = 0, std::optional escape = {}) { - TArrowCSV reader(columns, header); - reader.SetDelimiter(delimiter); + auto reader = TArrowCSV::Create(columns, header); + UNIT_ASSERT_C(reader.ok(), reader.status().ToString()); + reader->SetDelimiter(delimiter); if (skipRows) { - reader.SetSkipRows(skipRows); + reader->SetSkipRows(skipRows); } if (escape) { - reader.SetEscaping(true, *escape); + reader->SetEscaping(true, *escape); } - return TestReadSingleBatch(reader, columns, data, numRows); + return TestReadSingleBatch(*reader, columns, data, numRows); } } @@ -95,10 +98,11 @@ Y_UNIT_TEST_SUITE(FormatCSV) { }; TInstant dtInstant; Y_ABORT_UNLESS(TInstant::TryParseIso8601(dateTimeString, dtInstant)); - TArrowCSV reader(columns, false); + auto reader = TArrowCSV::Create(columns, false); + UNIT_ASSERT_C(reader.ok(), reader.status().ToString()); TString errorMessage; - auto batch = reader.ReadNext(data, errorMessage); + auto batch = reader->ReadNext(data, errorMessage); Cerr << errorMessage << "\n"; UNIT_ASSERT(!!batch); UNIT_ASSERT(errorMessage.empty()); @@ -155,10 +159,11 @@ Y_UNIT_TEST_SUITE(FormatCSV) { TVector> columns; { - TArrowCSV reader(columns, false); + auto reader = TArrowCSV::Create(columns, false); + UNIT_ASSERT_C(reader.ok(), reader.status().ToString()); TString errorMessage; - auto batch = reader.ReadNext(data, errorMessage); + auto batch = reader->ReadNext(data, errorMessage); Cerr << errorMessage << "\n"; UNIT_ASSERT(!batch); UNIT_ASSERT(!errorMessage.empty()); @@ -170,10 +175,11 @@ Y_UNIT_TEST_SUITE(FormatCSV) { {"i64", NScheme::TTypeInfo(NScheme::NTypeIds::Int64)} }; - TArrowCSV reader(columns, false); + auto reader = TArrowCSV::Create(columns, false); + UNIT_ASSERT_C(reader.ok(), reader.status().ToString()); TString errorMessage; - auto batch = reader.ReadNext(data, errorMessage); + auto batch = reader->ReadNext(data, errorMessage); Cerr << errorMessage << "\n"; UNIT_ASSERT(!batch); UNIT_ASSERT(!errorMessage.empty()); @@ -291,14 +297,15 @@ Y_UNIT_TEST_SUITE(FormatCSV) { csv += TString() + null + delimiter + q + null + q + delimiter + q + null + q + endLine; csv += TString() + null + delimiter + null + delimiter + null + endLine; - TArrowCSV reader(columns, false); + auto reader = TArrowCSV::Create(columns, false); + UNIT_ASSERT_C(reader.ok(), reader.status().ToString()); if (!nulls.empty() || !defaultNull) { - reader.SetNullValue(null); + reader->SetNullValue(null); } else { defaultNull = false; } - auto batch = TestReadSingleBatch(reader, columns, csv, 3); + auto batch = TestReadSingleBatch(*reader, columns, csv, 3); Cerr << "src:\n" << csv; diff --git a/ydb/core/kqp/compute_actor/kqp_compute_events.h b/ydb/core/kqp/compute_actor/kqp_compute_events.h index f31f946d28fd..7516952c9437 100644 --- a/ydb/core/kqp/compute_actor/kqp_compute_events.h +++ b/ydb/core/kqp/compute_actor/kqp_compute_events.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -40,7 +41,7 @@ struct TEvKqpCompute { ui32 ScanId; ui32 Generation; TVector Rows; - std::shared_ptr ArrowBatch; + std::shared_ptr ArrowBatch; std::vector> SplittedBatches; TOwnedCellVec LastKey; @@ -123,7 +124,7 @@ struct TEvKqpCompute { if (pbEv->Record.HasArrowBatch()) { auto batch = pbEv->Record.GetArrowBatch(); auto schema = NArrow::DeserializeSchema(batch.GetSchema()); - ev->ArrowBatch = NArrow::DeserializeBatch(batch.GetBatch(), schema); + ev->ArrowBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({NArrow::DeserializeBatch(batch.GetBatch(), schema)})); } return ev.Release(); } @@ -160,7 +161,7 @@ struct TEvKqpCompute { Y_DEBUG_ABORT_UNLESS(ArrowBatch != nullptr); auto* protoArrowBatch = Remote->Record.MutableArrowBatch(); protoArrowBatch->SetSchema(NArrow::SerializeSchema(*ArrowBatch->schema())); - protoArrowBatch->SetBatch(NArrow::SerializeBatchNoCompression(ArrowBatch)); + protoArrowBatch->SetBatch(NArrow::SerializeBatchNoCompression(NArrow::ToBatch(ArrowBatch, true))); break; } } diff --git a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h index c7a57d58d3e8..a88ac5736055 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_compute_manager.h @@ -249,17 +249,6 @@ class TInFlightShards: public NComputeActor::TScanShardsStatistics { const IExternalObjectsProvider& ExternalObjectsProvider; public: - bool RestartScanner(TShardState& state) { - StopScanner(state.TabletId, false); - state.ResetRetry(); - static constexpr ui64 MAX_SHARD_RETRIES = 5; // retry after: 0, 250, 500, 1000, 2000 - if (++state.TotalRetries >= MAX_SHARD_RETRIES) { - return false; - } - StartScanner(state); - return true; - } - void AbortAllScanners(const TString& errorMessage) { for (auto&& itTablet : ShardScanners) { itTablet.second->Stop(true, errorMessage); diff --git a/ydb/core/kqp/compute_actor/kqp_scan_events.h b/ydb/core/kqp/compute_actor/kqp_scan_events.h index cfff8c3ad4dc..af455be7b7e7 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_events.h +++ b/ydb/core/kqp/compute_actor/kqp_scan_events.h @@ -39,7 +39,7 @@ struct TEvScanExchange { class TEvSendData: public NActors::TEventLocal { private: - YDB_READONLY_DEF(std::shared_ptr, ArrowBatch); + YDB_READONLY_DEF(std::shared_ptr, ArrowBatch); YDB_ACCESSOR_DEF(TVector, Rows); YDB_READONLY(ui64, TabletId, 0); YDB_ACCESSOR_DEF(std::vector, DataIndexes); @@ -48,7 +48,7 @@ struct TEvScanExchange { return ArrowBatch ? ArrowBatch->num_rows() : Rows.size(); } - TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId) + TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId) : ArrowBatch(arrowBatch) , TabletId(tabletId) { @@ -56,7 +56,7 @@ struct TEvScanExchange { Y_ABORT_UNLESS(ArrowBatch->num_rows()); } - TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId, std::vector&& dataIndexes) + TEvSendData(const std::shared_ptr& arrowBatch, const ui64 tabletId, std::vector&& dataIndexes) : ArrowBatch(arrowBatch) , TabletId(tabletId) , DataIndexes(std::move(dataIndexes)) diff --git a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp index 5d29c47c9260..b4d23bbfb930 100644 --- a/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp +++ b/ydb/core/kqp/compute_actor/kqp_scan_fetcher_actor.cpp @@ -170,13 +170,10 @@ void TKqpScanFetcherActor::HandleExecute(TEvKqpCompute::TEvScanError::TPtr& ev) } if (state->State == EShardState::PostRunning || state->State == EShardState::Running) { - ++TotalRetries; - if (!InFlightShards.RestartScanner(*state) || TotalRetries >= MAX_TOTAL_SHARD_RETRIES) { - CA_LOG_E("TKqpScanFetcherActor: broken tablet for this request " << state->TabletId - << ", retries limit exceeded (" << state->TotalRetries << "/" << TotalRetries << ")"); - SendGlobalFail(NDqProto::COMPUTE_STATE_FAILURE, YdbStatusToDqStatus(status), issues); - return PassAway(); - } + CA_LOG_E("TKqpScanFetcherActor: broken tablet for this request " << state->TabletId + << ", retries limit exceeded (" << state->TotalRetries << "/" << TotalRetries << ")"); + SendGlobalFail(NDqProto::COMPUTE_STATE_FAILURE, YdbStatusToDqStatus(status), issues); + return PassAway(); } } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/add_column.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/add_column.cpp index 0353207940b7..aeec2c01f225 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/add_column.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/add_column.cpp @@ -11,6 +11,10 @@ TConclusionStatus TAddColumnOperation::DoDeserialize(NYql::TObjectSettingsImpl:: } ColumnName = *fValue; } + StorageId = features.Extract("STORAGE_ID"); + if (StorageId && !*StorageId) { + return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); + } { auto fValue = features.Extract("TYPE"); if (!fValue) { @@ -31,6 +35,9 @@ void TAddColumnOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSc auto column = schemaData.AddAddColumns(); column->SetName(ColumnName); column->SetType(ColumnType); + if (StorageId) { + column->SetStorageId(*StorageId); + } column->SetNotNull(NotNull); } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/add_column.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/add_column.h index 6bf452c9f860..a78207845dea 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/add_column.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/add_column.h @@ -12,6 +12,7 @@ class TAddColumnOperation : public ITableStoreOperation { private: TString ColumnName; TString ColumnType; + std::optional StorageId; bool NotNull = false; public: TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp index c3b65e981194..bd430d71fc47 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.cpp @@ -10,6 +10,10 @@ TConclusionStatus TAlterColumnOperation::DoDeserialize(NYql::TObjectSettingsImpl } ColumnName = *fValue; } + StorageId = features.Extract("STORAGE_ID"); + if (StorageId && !*StorageId) { + return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); + } { auto result = DictionaryEncodingDiff.DeserializeFromRequestFeatures(features); if (!result) { @@ -28,6 +32,9 @@ TConclusionStatus TAlterColumnOperation::DoDeserialize(NYql::TObjectSettingsImpl void TAlterColumnOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { auto* column = schemaData.AddAlterColumns(); column->SetName(ColumnName); + if (StorageId && !!*StorageId) { + column->SetStorageId(*StorageId); + } if (!!Serializer) { Serializer.SerializeToProto(*column->MutableSerializer()); } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h index 81c0e362be3d..c883ab035f9d 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/alter_column.h @@ -13,6 +13,7 @@ class TAlterColumnOperation : public ITableStoreOperation { static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); TString ColumnName; + std::optional StorageId; NArrow::NSerialization::TSerializerContainer Serializer; NArrow::NDictionary::TEncodingDiff DictionaryEncodingDiff; diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp new file mode 100644 index 000000000000..94a18e7e4140 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.cpp @@ -0,0 +1,21 @@ +#include "drop_stat.h" +#include + +namespace NKikimr::NKqp { + +TConclusionStatus TDropStatOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { + { + auto fValue = features.Extract("NAME"); + if (!fValue) { + return TConclusionStatus::Fail("can't find parameter NAME"); + } + Name = *fValue; + } + return TConclusionStatus::Success(); +} + +void TDropStatOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { + *schemaData.AddDropStatistics() = Name; +} + +} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h new file mode 100644 index 000000000000..777aae036858 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/drop_stat.h @@ -0,0 +1,19 @@ +#include "abstract.h" + +namespace NKikimr::NKqp { + +class TDropStatOperation : public ITableStoreOperation { + static TString GetTypeName() { + return "DROP_STAT"; + } + + static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); +private: + TString Name; +public: + TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; + void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; +}; + +} + diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp index 61914cb6e005..ae0f08e3333d 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.cpp @@ -12,6 +12,10 @@ TConclusionStatus TUpsertIndexOperation::DoDeserialize(NYql::TObjectSettingsImpl } IndexName = *fValue; } + StorageId = features.Extract("STORAGE_ID"); + if (StorageId && !*StorageId) { + return TConclusionStatus::Fail("STORAGE_ID cannot be empty string"); + } TString indexType; { auto fValue = features.Extract("TYPE"); @@ -42,6 +46,9 @@ TConclusionStatus TUpsertIndexOperation::DoDeserialize(NYql::TObjectSettingsImpl void TUpsertIndexOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { auto* indexProto = schemaData.AddUpsertIndexes(); + if (StorageId) { + indexProto->SetStorageId(*StorageId); + } indexProto->SetName(IndexName); IndexMetaConstructor.SerializeToProto(*indexProto); } diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h index 267829a1a5f4..12305f85f0ae 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_index.h @@ -12,6 +12,7 @@ class TUpsertIndexOperation : public ITableStoreOperation { static inline auto Registrator = TFactory::TRegistrator(GetTypeName()); private: TString IndexName; + std::optional StorageId; NBackgroundTasks::TInterfaceProtoContainer IndexMetaConstructor; public: TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_opt.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_opt.cpp new file mode 100644 index 000000000000..e3474723d5aa --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_opt.cpp @@ -0,0 +1,20 @@ +#include "upsert_opt.h" +#include +#include + +namespace NKikimr::NKqp { + +TConclusionStatus TUpsertOptionsOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { + auto value = features.Extract("SCHEME_NEED_ACTUALIZATION", false); + if (!value) { + TConclusionStatus::Fail("Incorrect value for SCHEME_NEED_ACTUALIZATION: cannot parse as boolean"); + } + SchemeNeedActualization = *value; + return TConclusionStatus::Success(); +} + +void TUpsertOptionsOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { + schemaData.MutableOptions()->SetSchemeNeedActualization(SchemeNeedActualization); +} + +} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_opt.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_opt.h new file mode 100644 index 000000000000..e0fdbe002ebd --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_opt.h @@ -0,0 +1,22 @@ +#include "abstract.h" +#include + +namespace NKikimr::NKqp { + +class TUpsertOptionsOperation : public ITableStoreOperation { +private: + static TString GetTypeName() { + return "UPSERT_OPTIONS"; + } + + static inline const auto Registrator = TFactory::TRegistrator(GetTypeName()); +private: + bool SchemeNeedActualization = false; +public: + TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; + + void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; +}; + +} + diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp new file mode 100644 index 000000000000..9e8360dd5e35 --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.cpp @@ -0,0 +1,49 @@ +#include "upsert_stat.h" +#include +#include + +namespace NKikimr::NKqp { + +TConclusionStatus TUpsertStatOperation::DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) { + { + auto fValue = features.Extract("NAME"); + if (!fValue) { + return TConclusionStatus::Fail("can't find alter parameter NAME"); + } + Name = *fValue; + } + TString type; + { + auto fValue = features.Extract("TYPE"); + if (!fValue) { + return TConclusionStatus::Fail("can't find alter parameter TYPE"); + } + type = *fValue; + } + { + auto fValue = features.Extract("FEATURES"); + if (!fValue) { + return TConclusionStatus::Fail("can't find alter parameter FEATURES"); + } + if (!Constructor.Initialize(type)) { + return TConclusionStatus::Fail("can't initialize stat constructor object for type \"" + type + "\""); + } + NJson::TJsonValue jsonData; + if (!NJson::ReadJsonFastTree(*fValue, &jsonData)) { + return TConclusionStatus::Fail("incorrect json in request FEATURES parameter"); + } + auto result = Constructor->DeserializeFromJson(jsonData); + if (result.IsFail()) { + return result; + } + } + return TConclusionStatus::Success(); +} + +void TUpsertStatOperation::DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const { + auto* proto = schemaData.AddUpsertStatistics(); + proto->SetName(Name); + Constructor.SerializeToProto(*proto); +} + +} diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h new file mode 100644 index 000000000000..5d8abdffae8d --- /dev/null +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/upsert_stat.h @@ -0,0 +1,23 @@ +#include "abstract.h" +#include + +namespace NKikimr::NKqp { + +class TUpsertStatOperation : public ITableStoreOperation { +private: + static TString GetTypeName() { + return "UPSERT_STAT"; + } + + static inline const auto Registrator = TFactory::TRegistrator(GetTypeName()); +private: + TString Name; + NOlap::NStatistics::TConstructorContainer Constructor; +public: + TConclusionStatus DoDeserialize(NYql::TObjectSettingsImpl::TFeaturesExtractor& features) override; + + void DoSerializeScheme(NKikimrSchemeOp::TAlterColumnTableSchema& schemaData) const override; +}; + +} + diff --git a/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make b/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make index 3301f543b8f6..17d70abf65f2 100644 --- a/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make +++ b/ydb/core/kqp/gateway/behaviour/tablestore/operations/ya.make @@ -7,11 +7,15 @@ SRCS( GLOBAL drop_column.cpp GLOBAL upsert_index.cpp GLOBAL drop_index.cpp + GLOBAL upsert_stat.cpp + GLOBAL drop_stat.cpp + GLOBAL upsert_opt.cpp ) PEERDIR( ydb/services/metadata/manager ydb/core/formats/arrow/serializer + ydb/core/tx/columnshard/engines/scheme/statistics/abstract ydb/core/kqp/gateway/utils ydb/core/protos ) diff --git a/ydb/core/kqp/runtime/kqp_scan_data.cpp b/ydb/core/kqp/runtime/kqp_scan_data.cpp index 7e8ef778f752..9ac41c01f221 100644 --- a/ydb/core/kqp/runtime/kqp_scan_data.cpp +++ b/ydb/core/kqp/runtime/kqp_scan_data.cpp @@ -3,6 +3,8 @@ #include #include #include +#include +#include #include #include @@ -210,11 +212,11 @@ template (array.Value(rowIndex))); } - static void Validate(TArrayType& /*array*/) { + static void Validate(const TArrayType& /*array*/) { } @@ -227,13 +229,13 @@ template <> class TElementAccessor { public: using TArrayType = arrow::Decimal128Array; - static void Validate(arrow::Decimal128Array& array) { + static void Validate(const arrow::Decimal128Array& array) { const auto& type = arrow::internal::checked_cast(*array.type()); YQL_ENSURE(type.precision() == NScheme::DECIMAL_PRECISION, "Unsupported Decimal precision."); YQL_ENSURE(type.scale() == NScheme::DECIMAL_SCALE, "Unsupported Decimal scale."); } - static NYql::NUdf::TUnboxedValue ExtractValue(arrow::Decimal128Array& array, const ui32 rowIndex) { + static NYql::NUdf::TUnboxedValue ExtractValue(const arrow::Decimal128Array& array, const ui32 rowIndex) { auto data = array.GetView(rowIndex); YQL_ENSURE(data.size() == sizeof(NYql::NDecimal::TInt128), "Wrong data size"); NYql::NDecimal::TInt128 val; @@ -249,10 +251,10 @@ template <> class TElementAccessor { public: using TArrayType = arrow::BinaryArray; - static void Validate(arrow::BinaryArray& /*array*/) { + static void Validate(const arrow::BinaryArray& /*array*/) { } - static NYql::NUdf::TUnboxedValue ExtractValue(arrow::BinaryArray& array, const ui32 rowIndex) { + static NYql::NUdf::TUnboxedValue ExtractValue(const arrow::BinaryArray& array, const ui32 rowIndex) { auto data = array.GetView(rowIndex); return MakeString(NUdf::TStringRef(data.data(), data.size())); } @@ -265,10 +267,10 @@ template <> class TElementAccessor { public: using TArrayType = arrow::FixedSizeBinaryArray; - static void Validate(arrow::FixedSizeBinaryArray& /*array*/) { + static void Validate(const arrow::FixedSizeBinaryArray& /*array*/) { } - static NYql::NUdf::TUnboxedValue ExtractValue(arrow::FixedSizeBinaryArray& array, const ui32 rowIndex) { + static NYql::NUdf::TUnboxedValue ExtractValue(const arrow::FixedSizeBinaryArray& array, const ui32 rowIndex) { auto data = array.GetView(rowIndex); return MakeString(NUdf::TStringRef(data.data(), data.size() - 1)); } @@ -281,26 +283,52 @@ class TElementAccessor { template TBytesStatistics WriteColumnValuesFromArrowSpecImpl(TAccessor editAccessor, - const TBatchDataAccessor& batch, const ui32 columnIndex, arrow::Array* arrayExt, NScheme::TTypeInfo columnType) { - auto& array = *reinterpret_cast(arrayExt); - TElementAccessor::Validate(array); + const TBatchDataAccessor& batch, const ui32 columnIndex, const std::shared_ptr& chunkedArrayExt, NScheme::TTypeInfo columnType) { auto statAccumulator = TElementAccessor::BuildStatAccumulator(columnType); + auto trivialChunkedArray = std::make_shared(chunkedArrayExt); + NArrow::NAccessor::IChunkedArray::TReader reader(trivialChunkedArray); + + std::optional chunkIdx; + std::optional currentIdxFrom; + std::optional address; + const typename TElementAccessor::TArrayType* currentArray = nullptr; const auto applyToIndex = [&](const ui32 rowIndexFrom, const ui32 rowIndexTo) { + if (!currentIdxFrom) { + address = reader.GetReadChunk(rowIndexFrom); + AFL_ENSURE(rowIndexFrom == 0)("real", rowIndexFrom); + } else { + AFL_ENSURE(rowIndexFrom == *currentIdxFrom + 1)("next", rowIndexFrom)("current", *currentIdxFrom); + if (!address->NextPosition()) { + address = reader.GetReadChunk(rowIndexFrom); + } + } + currentIdxFrom = rowIndexFrom; + + if (!chunkIdx || *chunkIdx != address->GetChunkIdx()) { + currentArray = static_cast(address->GetArray().get()); + TElementAccessor::Validate(*currentArray); + chunkIdx = address->GetChunkIdx(); + } + auto& rowItem = editAccessor(rowIndexTo, columnIndex); - if (array.IsNull(rowIndexFrom)) { + if (currentArray->IsNull(address->GetPosition())) { statAccumulator.AddNull(); rowItem = NUdf::TUnboxedValue(); } else { - rowItem = TElementAccessor::ExtractValue(array, rowIndexFrom); + rowItem = TElementAccessor::ExtractValue(*currentArray, address->GetPosition()); statAccumulator.AddValue(rowItem); } }; if (batch.HasDataIndexes()) { ui32 idx = 0; - for (const i64 rowIndex: batch.GetDataIndexes()) { - applyToIndex(rowIndex, idx++); + std::map remapIndexes; + for (const i64 rowIndex : batch.GetDataIndexes()) { + YQL_ENSURE(remapIndexes.emplace(rowIndex, idx++).second); + } + for (auto&& i : remapIndexes) { + applyToIndex(i.first, i.second); } } else { for (i64 rowIndex = 0; rowIndex < batch.GetRecordsCount(); ++rowIndex) { @@ -314,8 +342,7 @@ TBytesStatistics WriteColumnValuesFromArrowSpecImpl(TAccessor editAccessor, template TBytesStatistics WriteColumnValuesFromArrowImpl(TAccessor editAccessor, const TBatchDataAccessor& batch, i64 columnIndex, NScheme::TTypeInfo columnType) { - std::shared_ptr columnSharedPtr = batch.GetBatch()->column(columnIndex); - arrow::Array* columnPtr = columnSharedPtr.get(); + const std::shared_ptr columnPtr = batch.GetBatch()->column(columnIndex); namespace NTypeIds = NScheme::NTypeIds; switch (columnType.GetTypeId()) { case NTypeIds::Bool: @@ -586,35 +613,36 @@ TBytesStatistics TKqpScanComputeContext::TScanData::TRowBatchReader::AddData(con return stats; } -TBytesStatistics TKqpScanComputeContext::TScanData::TBlockBatchReader::AddData(const TBatchDataAccessor& batch, TMaybe /*shardId*/, +TBytesStatistics TKqpScanComputeContext::TScanData::TBlockBatchReader::AddData(const TBatchDataAccessor& dataAccessor, TMaybe /*shardId*/, const THolderFactory& holderFactory) { TBytesStatistics stats; auto totalColsCount = TotalColumnsCount + 1; - TUnboxedValueVector batchValues; - batchValues.resize(totalColsCount); - std::shared_ptr filtered = batch.GetFiltered(); - for (int i = 0; i < filtered->num_columns(); ++i) { - batchValues[i] = holderFactory.CreateArrowBlock(arrow::Datum(filtered->column_data(i))); - } - const ui64 batchByteSize = NYql::NUdf::GetSizeOfArrowBatchInBytes(*filtered); - stats.AddStatistics({batchByteSize, batchByteSize}); - - // !!! TODO !!! - // if (!SystemColumns.empty()) { - // for (i64 rowIndex = 0; rowIndex < batch.num_rows(); ++rowIndex) { - // FillSystemColumns(&cells[rowIndex * ColumnsCount() + ResultColumns.size()], shardId, SystemColumns); - // } + auto batches = NArrow::SliceToRecordBatches(dataAccessor.GetFiltered()); + for (auto&& filtered : batches) { + TUnboxedValueVector batchValues; + batchValues.resize(totalColsCount); + for (int i = 0; i < filtered->num_columns(); ++i) { + batchValues[i] = holderFactory.CreateArrowBlock(arrow::Datum(filtered->column(i))); + } + const ui64 batchByteSize = NArrow::GetBatchDataSize(filtered); + stats.AddStatistics({batchByteSize, batchByteSize}); - // stats.AllocatedBytes += batch.num_rows() * SystemColumns.size() * sizeof(NUdf::TUnboxedValue); - // } + // !!! TODO !!! + // if (!SystemColumns.empty()) { + // for (i64 rowIndex = 0; rowIndex < batch.num_rows(); ++rowIndex) { + // FillSystemColumns(&cells[rowIndex * ColumnsCount() + ResultColumns.size()], shardId, SystemColumns); + // } - batchValues[totalColsCount - 1] = holderFactory.CreateArrowBlock(arrow::Datum(std::make_shared(batch.GetRecordsCount()))); - stats.AddStatistics({ sizeof(ui64) * batch.GetRecordsCount(), sizeof(ui64) * batch.GetRecordsCount()}); + // stats.AllocatedBytes += batch.num_rows() * SystemColumns.size() * sizeof(NUdf::TUnboxedValue); + // } - BlockBatches.emplace(TBlockBatch(totalColsCount, batch.GetRecordsCount(), std::move(batchValues), stats.AllocatedBytes)); - StoredBytes += stats.AllocatedBytes; + batchValues[totalColsCount - 1] = holderFactory.CreateArrowBlock(arrow::Datum(std::make_shared(filtered->num_rows()))); + stats.AddStatistics({sizeof(ui64) * filtered->num_rows(), sizeof(ui64) * filtered->num_rows()}); + BlockBatches.emplace(TBlockBatch(totalColsCount, filtered->num_rows(), std::move(batchValues), stats.AllocatedBytes)); + StoredBytes += stats.AllocatedBytes; + } return stats; } diff --git a/ydb/core/kqp/runtime/kqp_scan_data.h b/ydb/core/kqp/runtime/kqp_scan_data.h index fc091217c3aa..bb1e3fa44177 100644 --- a/ydb/core/kqp/runtime/kqp_scan_data.h +++ b/ydb/core/kqp/runtime/kqp_scan_data.h @@ -61,15 +61,15 @@ struct TBytesStatistics { class TBatchDataAccessor { private: - YDB_READONLY_DEF(std::shared_ptr, Batch); + YDB_READONLY_DEF(std::shared_ptr, Batch); YDB_READONLY_DEF(std::vector, DataIndexes); - mutable std::shared_ptr FilteredBatch; + mutable std::shared_ptr FilteredBatch; public: - std::shared_ptr GetFiltered() const { + std::shared_ptr GetFiltered() const { if (!FilteredBatch) { if (DataIndexes.size()) { auto permutation = NArrow::MakeFilterPermutation(DataIndexes); - FilteredBatch = NArrow::TStatusValidator::GetValid(arrow::compute::Take(Batch, permutation)).record_batch(); + FilteredBatch = NArrow::TStatusValidator::GetValid(arrow::compute::Take(Batch, permutation)).table(); } else { FilteredBatch = Batch; } @@ -85,7 +85,7 @@ class TBatchDataAccessor { return DataIndexes.size() ? DataIndexes.size() : Batch->num_rows(); } - TBatchDataAccessor(const std::shared_ptr& batch, std::vector&& dataIndexes) + TBatchDataAccessor(const std::shared_ptr& batch, std::vector&& dataIndexes) : Batch(batch) , DataIndexes(std::move(dataIndexes)) { @@ -93,12 +93,19 @@ class TBatchDataAccessor { AFL_VERIFY(Batch->num_rows()); } - TBatchDataAccessor(const std::shared_ptr& batch) + TBatchDataAccessor(const std::shared_ptr& batch) : Batch(batch) { AFL_VERIFY(Batch); AFL_VERIFY(Batch->num_rows()); } + + TBatchDataAccessor(const std::shared_ptr& batch) + : Batch(NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({batch}))) { + AFL_VERIFY(Batch); + AFL_VERIFY(Batch->num_rows()); + + } }; TBytesStatistics GetUnboxedValueSize(const NUdf::TUnboxedValue& value, const NScheme::TTypeInfo& type); diff --git a/ydb/core/kqp/runtime/kqp_write_actor.cpp b/ydb/core/kqp/runtime/kqp_write_actor.cpp index c6217022aeeb..6d375d38eef3 100644 --- a/ydb/core/kqp/runtime/kqp_write_actor.cpp +++ b/ydb/core/kqp/runtime/kqp_write_actor.cpp @@ -334,7 +334,7 @@ class TKqpWriteActor : public TActorBootstrapped, public NYql::N auto evWrite = std::make_unique( inFlightBatch.TxId, NKikimrDataEvents::TEvWrite::MODE_PREPARE); - ui64 payloadIndex = NKikimr::NEvWrite::TPayloadHelper(*evWrite) + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite) .AddDataToPayload(TString(inFlightBatch.Data)); evWrite->AddOperation( diff --git a/ydb/core/kqp/ut/common/columnshard.cpp b/ydb/core/kqp/ut/common/columnshard.cpp index e4e8132a5d05..9ff87240e3fa 100644 --- a/ydb/core/kqp/ut/common/columnshard.cpp +++ b/ydb/core/kqp/ut/common/columnshard.cpp @@ -3,6 +3,27 @@ namespace NKikimr { namespace NKqp { + + TString GetConfigProtoWithName(const TString & tierName) { + return TStringBuilder() << "Name : \"" << tierName << "\"\n" << + R"( + ObjectStorage : { + Endpoint: "fake" + Bucket: "fake" + SecretableAccessKey: { + Value: { + Data: "secretAccessKey" + } + } + SecretableSecretKey: { + Value: { + Data: "secretSecretKey" + } + } + } + )"; + } + using namespace NYdb; TTestHelper::TTestHelper(const TKikimrSettings& settings) @@ -23,9 +44,46 @@ namespace NKqp { return Session; } - void TTestHelper::CreateTable(const TColumnTableBase& table) { + void TTestHelper::CreateTable(const TColumnTableBase& table, const EStatus expectedStatus) { std::cerr << (table.BuildQuery()) << std::endl; auto result = Session.ExecuteSchemeQuery(table.BuildQuery()).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), expectedStatus, result.GetIssues().ToString()); + } + + void TTestHelper::CreateTier(const TString& tierName) { + auto result = Session.ExecuteSchemeQuery("CREATE OBJECT " + tierName + " (TYPE TIER) WITH tierConfig = `" + GetConfigProtoWithName(tierName) + "`").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + TString TTestHelper::CreateTieringRule(const TString& tierName, const TString& columnName) { + const TString ruleName = tierName + "_" + columnName; + const TString configTieringStr = TStringBuilder() << R"({ + "rules" : [ + { + "tierName" : ")" << tierName << R"(", + "durationForEvict" : "10d" + } + ] + })"; + auto result = Session.ExecuteSchemeQuery("CREATE OBJECT IF NOT EXISTS " + ruleName + " (TYPE TIERING_RULE) WITH (defaultColumn = " + columnName + ", description = `" + configTieringStr + "`)").GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + return ruleName; + } + + void TTestHelper::SetTiering(const TString& tableName, const TString& ruleName) { + auto alterQuery = TStringBuilder() << "ALTER TABLE `" << tableName << "` SET (TIERING = '" << ruleName << "')"; + auto result = Session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + void TTestHelper::ResetTiering(const TString& tableName) { + auto alterQuery = TStringBuilder() << "ALTER TABLE `" << tableName << "` RESET (TIERING)"; + auto result = Session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); + } + + void TTestHelper::DropTable(const TString& tableName) { + auto result = Session.DropTable(tableName).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), EStatus::SUCCESS, result.GetIssues().ToString()); } @@ -62,7 +120,8 @@ namespace NKqp { } } for (auto shard : shards) { - RebootTablet(*runtime, shard, sender); + Kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new TEvents::TEvPoisonPill(), shard, false)); } } diff --git a/ydb/core/kqp/ut/common/columnshard.h b/ydb/core/kqp/ut/common/columnshard.h index adc2ea16989a..2ac738be7cd7 100644 --- a/ydb/core/kqp/ut/common/columnshard.h +++ b/ydb/core/kqp/ut/common/columnshard.h @@ -70,7 +70,12 @@ namespace NKqp { TKikimrRunner& GetKikimr(); TTestActorRuntime& GetRuntime(); NYdb::NTable::TSession& GetSession(); - void CreateTable(const TColumnTableBase& table); + void CreateTable(const TColumnTableBase& table, const NYdb::EStatus expectedStatus = NYdb::EStatus::SUCCESS); + void DropTable(const TString& tableName); + void CreateTier(const TString& tierName); + TString CreateTieringRule(const TString& tierName, const TString& columnName); + void SetTiering(const TString& tableName, const TString& ruleName); + void ResetTiering(const TString& tableName); void BulkUpsert(const TColumnTable& table, TTestHelper::TUpdatesBuilder& updates, const Ydb::StatusIds_StatusCode& opStatus = Ydb::StatusIds::SUCCESS); void BulkUpsert(const TColumnTable& table, std::shared_ptr batch, const Ydb::StatusIds_StatusCode& opStatus = Ydb::StatusIds::SUCCESS); void ReadData(const TString& query, const TString& expected, const NYdb::EStatus opStatus = NYdb::EStatus::SUCCESS); diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.cpp b/ydb/core/kqp/ut/common/kqp_ut_common.cpp index adea3a31a098..46abedf3101f 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.cpp +++ b/ydb/core/kqp/ut/common/kqp_ut_common.cpp @@ -1305,6 +1305,18 @@ TVector GetTableShards(Tests::TServer* server, return shards; } +TVector GetColumnTableShards(Tests::TServer* server, + TActorId sender, + const TString &path) +{ + TVector shards; + auto lsResult = DescribeTable(server, sender, path); + for (auto &part : lsResult.GetPathDescription().GetColumnTableDescription().GetSharding().GetColumnShards()) + shards.push_back(part); + + return shards; +} + TVector GetTableShards(Tests::TServer::TPtr server, TActorId sender, const TString &path) { diff --git a/ydb/core/kqp/ut/common/kqp_ut_common.h b/ydb/core/kqp/ut/common/kqp_ut_common.h index 10b01f158175..8af2defd13ad 100644 --- a/ydb/core/kqp/ut/common/kqp_ut_common.h +++ b/ydb/core/kqp/ut/common/kqp_ut_common.h @@ -325,6 +325,7 @@ NKikimrScheme::TEvDescribeSchemeResult DescribeTable(Tests::TServer* server, TAc TVector GetTableShards(Tests::TServer* server, TActorId sender, const TString &path); TVector GetTableShards(Tests::TServer::TPtr server, TActorId sender, const TString &path); +TVector GetColumnTableShards(Tests::TServer* server, TActorId sender, const TString &path); void WaitForZeroSessions(const NKqp::TKqpCounters& counters); diff --git a/ydb/core/kqp/ut/olap/aggregations_ut.cpp b/ydb/core/kqp/ut/olap/aggregations_ut.cpp new file mode 100644 index 000000000000..841a04f9ca62 --- /dev/null +++ b/ydb/core/kqp/ut/olap/aggregations_ut.cpp @@ -0,0 +1,1354 @@ +#include + +#include "helpers/aggregation.h" + +#include +#include +#include + +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapAggregations) { + Y_UNIT_TEST(Aggregation) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + + auto tableClient = kikimr.GetTableClient(); + + { + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[0u;]])"); + } + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + + { + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT + COUNT(*), MAX(`resource_id`), MAX(`timestamp`), MIN(LENGTH(`message`)) + FROM `/Root/olapStore/olapTable` + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[23000u;["40999"];[3004999u];[1036u]]])"); + } + + { + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[23000u;]])"); + } + } + + Y_UNIT_TEST(AggregationCountPushdown) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + while (csController->GetInsertFinishedCounter().Val() == 0) { + Cout << "Wait indexation..." << Endl; + Sleep(TDuration::Seconds(2)); + } + AFL_VERIFY(Singleton()->GetSize()); + + { + TString query = R"( + --!syntax_v1 + SELECT + COUNT(level) + FROM `/Root/olapStore/olapTable` + )"; + auto opStartTime = Now(); + auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cerr << "!!!\nPushdown query execution time: " << (Now() - opStartTime).MilliSeconds() << "\n!!!\n"; + Cout << result << Endl; + CompareYson(result, R"([[23000u;]])"); + + // Check plan +#if SSA_RUNTIME_VERSION >= 2U + CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); +#else + CheckPlanForAggregatePushdown(query, tableClient, { "CombineCore" }, ""); +#endif + } + } + + Y_UNIT_TEST(AggregationCountGroupByPushdown) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + + { + TString query = R"( + --!syntax_v1 + PRAGMA Kikimr.OptUseFinalizeByKey; + SELECT + level, COUNT(level) + FROM `/Root/olapStore/olapTable` + GROUP BY level + ORDER BY level + )"; + auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[[0];4600u];[[1];4600u];[[2];4600u];[[3];4600u];[[4];4600u]])"); + + // Check plan +#if SSA_RUNTIME_VERSION >= 2U + CheckPlanForAggregatePushdown(query, tableClient, { "WideCombiner" }, "Aggregate-TableFullScan"); +// CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); +#else + CheckPlanForAggregatePushdown(query, tableClient, { "CombineCore" }, ""); +#endif + } + } + + Y_UNIT_TEST_TWIN(CountAllPushdown, UseLlvm) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + + { + TString query = fmt::format(R"( + --!syntax_v1 + PRAGMA ydb.UseLlvm = "{}"; + + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + )", UseLlvm ? "true" : "false"); + auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[23000u;]])"); + + // Check plan +#if SSA_RUNTIME_VERSION >= 2U + CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); +#else + CheckPlanForAggregatePushdown(query, tableClient, { "Condense" }, ""); +#endif + } + } + + Y_UNIT_TEST_TWIN(CountAllPushdownBackwardCompatibility, EnableLlvm) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + + { + TString query = fmt::format(R"( + --!syntax_v1 + PRAGMA Kikimr.EnableLlvm = "{}"; + + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + )", EnableLlvm ? "true" : "false"); + auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[23000u;]])"); + + // Check plan +#if SSA_RUNTIME_VERSION >= 2U + CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); +#else + CheckPlanForAggregatePushdown(query, tableClient, { "Condense" }, ""); +#endif + } + } + + Y_UNIT_TEST(CountAllNoPushdown) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + + { + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[23000u;]])"); + } + } + + Y_UNIT_TEST(Filter_NotAllUsedFieldsInResultSet) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, resource_id FROM `/Root/tableWithNulls` + WHERE + level = 5; + )") + .SetExpectedReply("[[5;#]]") + .AddExpectedPlanOptions("KqpOlapFilter"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultDistinctCountRI_GroupByL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, COUNT(DISTINCT resource_id) + FROM `/Root/olapStore/olapTable` + GROUP BY level + ORDER BY level + )") + .SetExpectedReply("[[[0];4600u];[[1];4600u];[[2];4600u];[[3];4600u];[[4];4600u]]") + ; + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultCountAll_FilterL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + WHERE level = 2 + )") + .SetExpectedReply("[[4600u;]]") + .AddExpectedPlanOptions("KqpOlapFilter") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg") + .MutableLimitChecker().SetExpectedResultCount(1) +#else + .AddExpectedPlanOptions("Condense") +#endif + ; + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultCountL_FilterL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + COUNT(level) + FROM `/Root/olapStore/olapTable` + WHERE level = 2 + )") + .SetExpectedReply("[[4600u;]]") + .AddExpectedPlanOptions("KqpOlapFilter") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg") + .MutableLimitChecker().SetExpectedResultCount(1) +#else + .AddExpectedPlanOptions("CombineCore") +#endif + ; + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultCountT_FilterL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + COUNT(timestamp) + FROM `/Root/olapStore/olapTable` + WHERE level = 2 + )") + .SetExpectedReply("[[4600u;]]") + .AddExpectedPlanOptions("KqpOlapFilter") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg") + .MutableLimitChecker().SetExpectedResultCount(1) +#else + .AddExpectedPlanOptions("CombineCore") + .AddExpectedPlanOptions("KqpOlapFilter") +#endif + ; + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultTL_FilterL_Limit2) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + timestamp, level + FROM `/Root/olapStore/olapTable` + WHERE level = 2 + LIMIT 2 + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .MutableLimitChecker().SetExpectedLimit(2); + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultTL_FilterL_OrderT_Limit2) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + timestamp, level + FROM `/Root/olapStore/olapTable` + WHERE level = 2 + ORDER BY timestamp + LIMIT 2 + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .MutableLimitChecker().SetExpectedLimit(2); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultT_FilterL_Limit2) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + timestamp + FROM `/Root/olapStore/olapTable` + WHERE level = 2 + LIMIT 2 + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .AddExpectedPlanOptions("KqpOlapExtractMembers") + .MutableLimitChecker().SetExpectedLimit(2); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultT_FilterL_OrderT_Limit2) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + timestamp + FROM `/Root/olapStore/olapTable` + WHERE level = 2 + ORDER BY timestamp + LIMIT 2 + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .AddExpectedPlanOptions("KqpOlapExtractMembers") + .MutableLimitChecker().SetExpectedLimit(2); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultL_FilterL_OrderL_Limit2) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + timestamp, level + FROM `/Root/olapStore/olapTable` + WHERE level > 1 + ORDER BY level + LIMIT 2 + )") + .AddExpectedPlanOptions("KqpOlapFilter"); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ResultCountExpr) { + auto g = NColumnShard::TLimits::MaxBlobSizeGuard(10000); + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + COUNT(level + 2) + FROM `/Root/olapStore/olapTable` + )") + .SetExpectedReply("[[23000u;]]") + .AddExpectedPlanOptions("Condense1"); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Count_Null) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + COUNT(level) + FROM `/Root/tableWithNulls` + WHERE id > 5; + )") + .SetExpectedReply("[[0u]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Count_NullMix) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + COUNT(level) + FROM `/Root/tableWithNulls`; + )") + .SetExpectedReply("[[5u]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Count_GroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, COUNT(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 4 AND 5 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[4;1u];[5;1u]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Count_NullGroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, COUNT(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 6 AND 7 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[6;0u];[7;0u]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Count_NullMixGroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, COUNT(level) + FROM `/Root/tableWithNulls` + WHERE id > 4 AND id < 7 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[5;1u];[6;0u]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Count_GroupByNull) { + // Wait for KIKIMR-16940 fix + return; + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, COUNT(id), COUNT(level), COUNT(*) + FROM `/Root/tableWithNulls` + WHERE id > 5 + GROUP BY level + ORDER BY level; + )") + .SetExpectedReply("[[#;5u;0u;5u]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Count_GroupByNullMix) { + // Wait for KIKIMR-16940 fix + return; + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, COUNT(id), COUNT(level), COUNT(*) + FROM `/Root/tableWithNulls` + WHERE id >= 5 + GROUP BY level + ORDER BY level; + )") + .SetExpectedReply("[[#;5u;0u;5u];[[5];1u;1u;1u]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_NoPushdownOnDisabledEmitAggApply) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + PRAGMA DisableEmitAggApply; + SELECT + COUNT(level) + FROM `/Root/olapStore/olapTable` + )") + .SetExpectedReply("[[23000u;]]") + .AddExpectedPlanOptions("Condense1"); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(AggregationAndFilterPushdownOnDiffCols) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + COUNT(`timestamp`) + FROM `/Root/olapStore/olapTable` + WHERE level = 2 + )") + .SetExpectedReply("[[4600u;]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg") +#else + .AddExpectedPlanOptions("CombineCore") +#endif + .AddExpectedPlanOptions("KqpOlapFilter"); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Avg) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + AVG(level), MIN(level) + FROM `/Root/olapStore/olapTable` + )") + .SetExpectedReply("[[[2.];[0]]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Avg_Null) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + AVG(level) + FROM `/Root/tableWithNulls` + WHERE id > 5; + )") + .SetExpectedReply("[[#]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Avg_NullMix) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + AVG(level) + FROM `/Root/tableWithNulls`; + )") + .SetExpectedReply("[[[3.]]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Avg_GroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, AVG(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 4 AND 5 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[4;[4.]];[5;[5.]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Avg_NullGroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, AVG(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 6 AND 7 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[6;#];[7;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Avg_NullMixGroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, AVG(level) + FROM `/Root/tableWithNulls` + WHERE id > 4 AND id < 7 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[5;[5.]];[6;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Avg_GroupByNull) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, AVG(id), AVG(level) + FROM `/Root/tableWithNulls` + WHERE id > 5 + GROUP BY level + ORDER BY level; + )") + .SetExpectedReply("[[#;8.;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Avg_GroupByNullMix) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, AVG(id), AVG(level) + FROM `/Root/tableWithNulls` + WHERE id >= 5 + GROUP BY level + ORDER BY level; + )") + .SetExpectedReply("[[#;8.;#];[[5];5.;[5.]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Sum) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + SUM(level) + FROM `/Root/olapStore/olapTable` + )") + .SetExpectedReply("[[[46000;]]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Sum_Null) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + SUM(level) + FROM `/Root/tableWithNulls` + WHERE id > 5; + )") + .SetExpectedReply("[[#]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Sum_NullMix) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + SUM(level) + FROM `/Root/tableWithNulls`; + )") + .SetExpectedReply("[[[15]]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Sum_GroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, SUM(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 4 AND 5 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[4;[4]];[5;[5]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Sum_NullGroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, SUM(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 6 AND 7 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[6;#];[7;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Sum_NullMixGroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, SUM(level) + FROM `/Root/tableWithNulls` + WHERE id > 4 AND id < 7 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[5;[5]];[6;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Sum_GroupByNull) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, SUM(id), SUM(level) + FROM `/Root/tableWithNulls` + WHERE id > 5 + GROUP BY level + ORDER BY level; + )") + .SetExpectedReply("[[#;40;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Sum_GroupByNullMix) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, SUM(id), SUM(level) + FROM `/Root/tableWithNulls` + WHERE id >= 5 + GROUP BY level + ORDER BY level; + )") + .SetExpectedReply("[[#;40;#];[[5];5;[5]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_SumL_GroupL_OrderL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, SUM(level) + FROM `/Root/olapStore/olapTable` + GROUP BY level + ORDER BY level + )") + .SetExpectedReply("[[[0];[0]];[[1];[4600]];[[2];[9200]];[[3];[13800]];[[4];[18400]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_MinL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + MIN(level) + FROM `/Root/olapStore/olapTable` + )") + .SetExpectedReply("[[[0]]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_MaxL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + MAX(level) + FROM `/Root/olapStore/olapTable` + )") + .SetExpectedReply("[[[4]]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_MinR_GroupL_OrderL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, MIN(resource_id) + FROM `/Root/olapStore/olapTable` + GROUP BY level + ORDER BY level + )") + .SetExpectedReply("[[[0];[\"10000\"]];[[1];[\"10001\"]];[[2];[\"10002\"]];[[3];[\"10003\"]];[[4];[\"10004\"]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_MaxR_GroupL_OrderL) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, MAX(resource_id) + FROM `/Root/olapStore/olapTable` + GROUP BY level + ORDER BY level + )") + .SetExpectedReply("[[[0];[\"40995\"]];[[1];[\"40996\"]];[[2];[\"40997\"]];[[3];[\"40998\"]];[[4];[\"40999\"]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_ProjectionOrder) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + resource_id, level, count(*) as c + FROM `/Root/olapStore/olapTable` + GROUP BY resource_id, level + ORDER BY c, resource_id DESC LIMIT 3 + )") + .SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") + .SetExpectedReadNodeType("Aggregate-TableFullScan"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + TestAggregations({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Some) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT SOME(level) FROM `/Root/tableWithNulls` WHERE id=1 + )") + .SetExpectedReply("[[[1]]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Some_Null) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT SOME(level) FROM `/Root/tableWithNulls` WHERE id > 5 + )") + .SetExpectedReply("[[#]]") +#if SSA_RUNTIME_VERSION >= 2U + .AddExpectedPlanOptions("TKqpOlapAgg"); +#else + .AddExpectedPlanOptions("CombineCore"); +#endif + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Some_GroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, SOME(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 4 AND 5 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[4;[4]];[5;[5]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Some_NullGroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, SOME(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 6 AND 7 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[6;#];[7;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Some_NullMixGroupBy) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, SOME(level) + FROM `/Root/tableWithNulls` + WHERE id > 4 AND id < 7 + GROUP BY id + ORDER BY id; + )") + .SetExpectedReply("[[5;[5]];[6;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Some_GroupByNullMix) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, SOME(id), SOME(level) + FROM `/Root/tableWithNulls` + WHERE id BETWEEN 5 AND 6 + GROUP BY level + ORDER BY level; + )") + .SetExpectedReply("[[#;6;#];[[5];5;[5]]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Aggregation_Some_GroupByNull) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, SOME(id), SOME(level) + FROM `/Root/tableWithNulls` + WHERE id = 6 + GROUP BY level + ORDER BY level; + )") + .SetExpectedReply("[[#;6;#]]"); + testCase.FillExpectedAggregationGroupByPlanOptions(); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(NoErrorOnLegacyPragma) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + PRAGMA Kikimr.KqpPushOlapProcess = "false"; + SELECT id, resource_id FROM `/Root/tableWithNulls` + WHERE + level = 5; + )") + .SetExpectedReply("[[5;#]]") + .AddExpectedPlanOptions("KqpOlapFilter"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(BlocksRead) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + PRAGMA UseBlocks; + PRAGMA Kikimr.OptEnableOlapPushdown = "false"; + + SELECT + id, resource_id + FROM `/Root/tableWithNulls` + WHERE + level = 5; + )") + .SetExpectedReply("[[5;#]]"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Blocks_NoAggPushdown) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + PRAGMA UseBlocks; + SELECT + COUNT(DISTINCT id) + FROM `/Root/tableWithNulls`; + )") + .SetExpectedReply("[[10u]]"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Json_GetValue) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` + WHERE JSON_VALUE(jsonval, "$.col1") = "val1" AND id = 1; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonValue") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[1;["val1"];#]])"); + + TestTableWithNulls({testCase}); + } + + Y_UNIT_TEST(Json_GetValue_Minus) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.'col-abc'"), JSON_VALUE(jsondoc, "$.'col-abc'") FROM `/Root/tableWithNulls` + WHERE JSON_VALUE(jsonval, "$.'col-abc'") = "val-abc" AND id = 1; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonValue") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[1;["val-abc"];#]])"); + + TestTableWithNulls({testCase}); + } + + Y_UNIT_TEST(Json_GetValue_ToString) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.col1" RETURNING String), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` + WHERE JSON_VALUE(jsonval, "$.col1" RETURNING String) = "val1" AND id = 1; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonValue") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[1;["val1"];#]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Json_GetValue_ToInt) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls` + WHERE JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 1; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonValue") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[1;[16];#]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(JsonDoc_GetValue) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` + WHERE JSON_VALUE(jsondoc, "$.col1") = "val1" AND id = 6; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonValue") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[6;#;["val1"]]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(JsonDoc_GetValue_ToString) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1" RETURNING String) FROM `/Root/tableWithNulls` + WHERE JSON_VALUE(jsondoc, "$.col1" RETURNING String) = "val1" AND id = 6; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonValue") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[6;#;["val1"]]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(JsonDoc_GetValue_ToInt) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int"), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls` + WHERE JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 6; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonValue") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[6;#;[16]]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Json_Exists) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_EXISTS(jsonval, "$.col1"), JSON_EXISTS(jsondoc, "$.col1") FROM `/Root/tableWithNulls` + WHERE + JSON_EXISTS(jsonval, "$.col1") AND level = 1; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonExists") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[1;[%true];#]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(JsonDoc_Exists) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_EXISTS(jsonval, "$.col1"), JSON_EXISTS(jsondoc, "$.col1") FROM `/Root/tableWithNulls` + WHERE + JSON_EXISTS(jsondoc, "$.col1") AND id = 6; + )") +#if SSA_RUNTIME_VERSION >= 3U + .AddExpectedPlanOptions("KqpOlapJsonExists") +#else + .AddExpectedPlanOptions("Udf") +#endif + .SetExpectedReply(R"([[6;#;[%true]]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(Json_Query) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT id, JSON_QUERY(jsonval, "$.col1" WITH UNCONDITIONAL WRAPPER), + JSON_QUERY(jsondoc, "$.col1" WITH UNCONDITIONAL WRAPPER) + FROM `/Root/tableWithNulls` + WHERE + level = 1; + )") + .AddExpectedPlanOptions("Udf") + .SetExpectedReply(R"([[1;["[\"val1\"]"];#]])"); + + TestTableWithNulls({ testCase }); + } + + Y_UNIT_TEST(BlockGenericWithDistinct) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + COUNT(DISTINCT id) + FROM `/Root/tableWithNulls` + WHERE level = 5 AND Cast(id AS String) = "5"; + )") + .AddExpectedPlanOptions("KqpBlockReadOlapTableRanges") + .AddExpectedPlanOptions("WideFromBlocks") + .SetExpectedReply("[[1u]]"); + TestTableWithNulls({ testCase }, /* generic */ true); + } + + Y_UNIT_TEST(BlockGenericSimpleAggregation) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + level, COUNT(*), SUM(id) + FROM `/Root/tableWithNulls` + WHERE level = 5 + GROUP BY level + ORDER BY level; + )") + .AddExpectedPlanOptions("KqpBlockReadOlapTableRanges") + .AddExpectedPlanOptions("WideFromBlocks") + .SetExpectedReply(R"([[[5];1u;5]])"); + + TestTableWithNulls({ testCase }, /* generic */ true); + } + + Y_UNIT_TEST(BlockGenericSelectAll) { + TAggregationTestCase testCase; + testCase.SetQuery(R"( + SELECT + id, resource_id, level + FROM `/Root/tableWithNulls` + WHERE level != 5 OR level IS NULL + ORDER BY id, resource_id, level; + )") + .AddExpectedPlanOptions("KqpBlockReadOlapTableRanges") + .AddExpectedPlanOptions("WideFromBlocks") + .SetExpectedReply(R"([[1;#;[1]];[2;#;[2]];[3;#;[3]];[4;#;[4]];[6;["6"];#];[7;["7"];#];[8;["8"];#];[9;["9"];#];[10;["10"];#]])"); + + TestTableWithNulls({ testCase }, /* generic */ true); + } +} + +} diff --git a/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp b/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp new file mode 100644 index 000000000000..caa39a4c35b5 --- /dev/null +++ b/ydb/core/kqp/ut/olap/blobs_sharing_ut.cpp @@ -0,0 +1,266 @@ +#include "helpers/typed_local.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapBlobsSharing) { + + namespace { + class TTransferStatus { + private: + YDB_ACCESSOR(bool, Proposed, false); + YDB_ACCESSOR(bool, Confirmed, false); + YDB_ACCESSOR(bool, Finished, false); + public: + void Reset() { + Confirmed = false; + Proposed = false; + Finished = false; + } + }; + + static TMutex CSTransferStatusesMutex; + static std::shared_ptr CSTransferStatus = std::make_shared(); + } + + class TTestController: public NOlap::NDataSharing::IInitiatorController { + private: + static const inline auto Registrator = TFactory::TRegistrator("test"); + protected: + virtual void DoProposeError(const TString& sessionId, const TString& message) const override { + AFL_VERIFY(false)("session_id", sessionId)("message", message); + } + virtual void DoProposeSuccess(const TString& sessionId) const override { + CSTransferStatus->SetProposed(true); + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "sharing_proposed")("session_id", sessionId); + } + virtual void DoConfirmSuccess(const TString& sessionId) const override { + CSTransferStatus->SetConfirmed(true); + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "sharing_confirmed")("session_id", sessionId); + } + virtual void DoFinished(const TString& sessionId) const override { + CSTransferStatus->SetFinished(true); + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "sharing_finished")("session_id", sessionId); + } + virtual void DoStatus(const NOlap::NDataSharing::TStatusContainer& status) const override { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "status")("info", status.SerializeToProto().DebugString()); + } + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardDataSharingProto::TInitiator::TController& /*proto*/) override { + return TConclusionStatus::Success(); + } + virtual void DoSerializeToProto(NKikimrColumnShardDataSharingProto::TInitiator::TController& /*proto*/) const override { + + } + + virtual TString GetClassName() const override { + return "test"; + } + }; + + class TSharingDataTestCase { + private: + const ui32 ShardsCount; + TKikimrRunner& Kikimr; + TTypedLocalHelper Helper; + NYDBTest::TControllers::TGuard Controller; + std::vector ShardIds; + std::vector PathIds; + YDB_ACCESSOR(bool, RebootTablet, false); + public: + const TTypedLocalHelper& GetHelper() const { + return Helper; + } + + void AddRecords(const ui32 recordsCount, const double kff = 0) { + Helper.FillPKOnly(kff, recordsCount); + } + + TSharingDataTestCase(const ui32 shardsCount, TKikimrRunner& kikimr) + : ShardsCount(shardsCount) + , Kikimr(kikimr) + , Helper("", Kikimr, "olapTable", "olapStore12") + , Controller(NYDBTest::TControllers::RegisterCSControllerGuard()) { + Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); + Controller->SetExpectedShardsCount(ShardsCount); + Controller->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + Controller->SetReadTimeoutClean(TDuration::Seconds(1)); + + Tests::NCommon::TLoggerInit(Kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").Initialize(); + + Helper.CreateTestOlapTable(ShardsCount, ShardsCount); + ShardIds = Controller->GetShardActualIds(); + AFL_VERIFY(ShardIds.size() == ShardsCount)("count", ShardIds.size())("ids", JoinSeq(",", ShardIds)); + std::set pathIdsSet; + for (auto&& i : ShardIds) { + auto pathIds = Controller->GetPathIds(i); + pathIdsSet.insert(pathIds.begin(), pathIds.end()); + } + PathIds = std::vector(pathIdsSet.begin(), pathIdsSet.end()); + AFL_VERIFY(PathIds.size() == 1)("count", PathIds.size())("ids", JoinSeq(",", PathIds)); + } + + void WaitNormalization() { + Controller->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Force); + const auto start = TInstant::Now(); + while (!Controller->IsTrivialLinks() && TInstant::Now() - start < TDuration::Seconds(30)) { + Cerr << "WAIT_TRIVIAL_LINKS..." << Endl; + Sleep(TDuration::Seconds(1)); + } + AFL_VERIFY(Controller->IsTrivialLinks()); + Controller->CheckInvariants(); + } + + void Execute(const ui64 destinationIdx, const std::vector& sourceIdxs, const bool move, const NOlap::TSnapshot& snapshot, const std::set& pathIdxs) { + AFL_VERIFY(destinationIdx < ShardIds.size()); + const ui64 destination = ShardIds[destinationIdx]; + std::vector sources; + for (auto&& i : sourceIdxs) { + AFL_VERIFY(i < ShardIds.size()); + sources.emplace_back(ShardIds[i]); + } + std::set pathIds; + for (auto&& i : pathIdxs) { + AFL_VERIFY(i < PathIds.size()); + AFL_VERIFY(pathIds.emplace(PathIds[i]).second); + } + Cerr << "SHARING: " << JoinSeq(",", sources) << "->" << destination << Endl; + THashMap pathIdsRemap; + for (auto&& i : pathIds) { + pathIdsRemap.emplace(i, i); + } + THashSet sourceTablets; + for (auto&& i : sources) { + AFL_VERIFY(sourceTablets.emplace((NOlap::TTabletId)i).second); + } + const TString sessionId = TGUID::CreateTimebased().AsUuidString(); + NOlap::NDataSharing::TTransferContext transferContext((NOlap::TTabletId)destination, sourceTablets, snapshot, move); + NOlap::NDataSharing::TDestinationSession session(std::make_shared(), pathIdsRemap, sessionId, transferContext); + Kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new NOlap::NDataSharing::NEvents::TEvProposeFromInitiator(session), destination, false)); + { + const TInstant start = TInstant::Now(); + while (!CSTransferStatus->GetProposed() && TInstant::Now() - start < TDuration::Seconds(10)) { + Sleep(TDuration::Seconds(1)); + Cerr << "WAIT_PROPOSING..." << Endl; + } + AFL_VERIFY(CSTransferStatus->GetProposed()); + } + if (RebootTablet) { + Kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new TEvents::TEvPoisonPill(), destination, false)); + } + { + const TInstant start = TInstant::Now(); + while (!CSTransferStatus->GetConfirmed() && TInstant::Now() - start < TDuration::Seconds(10)) { + Kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new NOlap::NDataSharing::NEvents::TEvConfirmFromInitiator(sessionId), destination, false)); + Sleep(TDuration::Seconds(1)); + Cerr << "WAIT_CONFIRMED..." << Endl; + } + AFL_VERIFY(CSTransferStatus->GetConfirmed()); + } + if (RebootTablet) { + Kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new TEvents::TEvPoisonPill(), destination, false)); + for (auto&& i : sources) { + Kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new TEvents::TEvPoisonPill(), i, false)); + } + } + { + const TInstant start = TInstant::Now(); + while (!CSTransferStatus->GetFinished() && TInstant::Now() - start < TDuration::Seconds(10)) { + Sleep(TDuration::Seconds(1)); + Cerr << "WAIT_FINISHED..." << Endl; + } + AFL_VERIFY(CSTransferStatus->GetFinished()); + } + CSTransferStatus->Reset(); + AFL_VERIFY(!Controller->IsTrivialLinks()); + } + }; + + Y_UNIT_TEST(BlobsSharingSplit1_1) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TSharingDataTestCase tester(4, kikimr); + tester.AddRecords(800000); + Sleep(TDuration::Seconds(1)); + tester.Execute(0, {1}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + } + + Y_UNIT_TEST(BlobsSharingSplit1_1_clean) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TSharingDataTestCase tester(2, kikimr); + tester.AddRecords(80000); + CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[80000u;]])"); + Sleep(TDuration::Seconds(1)); + tester.Execute(0, {1}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[119928u;]])"); + tester.AddRecords(80000, 0.8); + tester.WaitNormalization(); + CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[183928u;]])"); + } + + Y_UNIT_TEST(BlobsSharingSplit1_1_clean_with_restarts) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TSharingDataTestCase tester(2, kikimr); + tester.SetRebootTablet(true); + tester.AddRecords(80000); + CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[80000u;]])"); + Sleep(TDuration::Seconds(1)); + tester.Execute(0, {1}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[119928u;]])"); + tester.AddRecords(80000, 0.8); + tester.WaitNormalization(); + CompareYson(tester.GetHelper().GetQueryResult("SELECT COUNT(*) FROM `/Root/olapStore12/olapTable`"), R"([[183928u;]])"); + } + + Y_UNIT_TEST(BlobsSharingSplit3_1) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TSharingDataTestCase tester(4, kikimr); + tester.AddRecords(800000); + Sleep(TDuration::Seconds(1)); + tester.Execute(0, {1, 2, 3}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + } + + Y_UNIT_TEST(BlobsSharingSplit1_3_1) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TSharingDataTestCase tester(4, kikimr); + tester.AddRecords(800000); + Sleep(TDuration::Seconds(1)); + tester.Execute(1, {0}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + tester.Execute(2, {0}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + tester.Execute(3, {0}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + tester.Execute(0, {1, 2, 3}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + } + + Y_UNIT_TEST(BlobsSharingSplit1_3_2_1_clean) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + TSharingDataTestCase tester(4, kikimr); + tester.AddRecords(800000); + Sleep(TDuration::Seconds(1)); + tester.Execute(1, {0}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + tester.Execute(2, {0}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + tester.Execute(3, {0}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + tester.AddRecords(800000, 0.9); + Sleep(TDuration::Seconds(1)); + tester.Execute(3, {2}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + tester.Execute(0, {1, 2}, false, NOlap::TSnapshot(TInstant::Now().MilliSeconds(), 1232123), {0}); + tester.WaitNormalization(); + } +} + +} diff --git a/ydb/core/kqp/ut/olap/clickbench_ut.cpp b/ydb/core/kqp/ut/olap/clickbench_ut.cpp new file mode 100644 index 000000000000..9aaff9a75c53 --- /dev/null +++ b/ydb/core/kqp/ut/olap/clickbench_ut.cpp @@ -0,0 +1,247 @@ +#include "helpers/aggregation.h" + +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapClickbench) { + + class TClickHelper : public Tests::NCS::TCickBenchHelper { + private: + using TBase = Tests::NCS::TCickBenchHelper; + public: + using TBase::TBase; + + TClickHelper(TKikimrRunner& runner) + : TBase(runner.GetTestServer()) + {} + + void CreateClickBenchTable(TString tableName = "benchTable", ui32 shardsCount = 4) { + TActorId sender = Server.GetRuntime()->AllocateEdgeActor(); + + TBase::CreateTestOlapTable(sender, "", Sprintf(R"( + Name: "%s" + ColumnShardCount: %d + Schema { + %s + } + Sharding { + HashSharding { + Function: HASH_FUNCTION_CONSISTENCY_64 + Columns: "EventTime" + } + })", tableName.c_str(), shardsCount, PROTO_SCHEMA)); + } + }; + + void WriteTestDataForClickBench(TKikimrRunner& kikimr, TString testTable, ui64 pathIdBegin, ui64 tsBegin, size_t rowCount) { + UNIT_ASSERT(testTable == "/Root/benchTable"); // TODO: check schema instead + TClickHelper lHelper(kikimr.GetTestServer()); + auto batch = lHelper.TestArrowBatch(pathIdBegin, tsBegin, rowCount); + lHelper.SendDataViaActorSystem(testTable, batch); + } + + void TestClickBenchBase(const std::vector& cases, const bool genericQuery) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + + TClickHelper(kikimr).CreateClickBenchTable(); + auto tableClient = kikimr.GetTableClient(); + + + ui32 numIterations = 10; + const ui32 iterationPackSize = NSan::PlainOrUnderSanitizer(2000, 20); + for (ui64 i = 0; i < numIterations; ++i) { + WriteTestDataForClickBench(kikimr, "/Root/benchTable", 0, 1000000 + i * 1000000, iterationPackSize); + } + + if (!genericQuery) { + auto tableClient = kikimr.GetTableClient(); + for (auto&& i : cases) { + const TString queryFixed = i.GetFixedQuery(); + RunTestCaseWithClient(i, tableClient); + CheckPlanForAggregatePushdown(queryFixed, tableClient, i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); + } + } else { + auto queryClient = kikimr.GetQueryClient(); + for (auto&& i : cases) { + const TString queryFixed = i.GetFixedQuery(); + RunTestCaseWithClient(i, queryClient); + CheckPlanForAggregatePushdown(queryFixed, queryClient, i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); + } + } + } + + void TestClickBenchInternal(const std::vector& cases) { + TPortManager tp; + ui16 mbusport = tp.GetPort(2134); + auto settings = Tests::TServerSettings(mbusport) + .SetDomainName("Root") + .SetUseRealThreads(false) + .SetNodeCount(2); + + Tests::TServer::TPtr server = new Tests::TServer(settings); + + auto runtime = server->GetRuntime(); + auto sender = runtime->AllocateEdgeActor(); + + InitRoot(server, sender); + + TClickHelper(*server).CreateClickBenchTable(); + + // write data + + ui32 numIterations = 10; + const ui32 iterationPackSize = NSan::PlainOrUnderSanitizer(2000, 20); + for (ui64 i = 0; i < numIterations; ++i) { + TClickHelper(*server).SendDataViaActorSystem("/Root/benchTable", 0, 1000000 + i * 1000000, + iterationPackSize); + } + + TAggregationTestCase currentTest; + auto captureEvents = [&](TAutoPtr& ev) -> auto { + switch (ev->GetTypeRewrite()) { + case NKqp::TKqpComputeEvents::EvScanData: + { + auto* msg = ev->Get(); + Y_ABORT_UNLESS(currentTest.MutableLimitChecker().CheckExpectedLimitOnScanData(msg->ArrowBatch ? msg->ArrowBatch->num_rows() : 0)); + Y_ABORT_UNLESS(currentTest.MutableRecordChecker().CheckExpectedOnScanData(msg->ArrowBatch ? msg->ArrowBatch->num_columns() : 0)); + break; + } + case TEvDataShard::EvKqpScan: + { + auto* msg = ev->Get(); + Y_ABORT_UNLESS(currentTest.MutableLimitChecker().CheckExpectedLimitOnScanTask(msg->Record.GetItemsLimit())); + break; + } + } + return TTestActorRuntime::EEventAction::PROCESS; + }; + runtime->SetObserverFunc(captureEvents); + + // selects + + for (auto&& i : cases) { + const TString queryFixed = i.GetFixedQuery(); + currentTest = i; + auto streamSender = runtime->AllocateEdgeActor(); + NDataShard::NKqpHelpers::SendRequest(*runtime, streamSender, NDataShard::NKqpHelpers::MakeStreamRequest(streamSender, queryFixed, false)); + auto ev = runtime->GrabEdgeEventRethrow(streamSender, TDuration::Seconds(10)); + Y_ABORT_UNLESS(currentTest.CheckFinished()); + } + } + + void TestClickBench(const std::vector& cases, const bool genericQuery = false) { + TestClickBenchBase(cases, genericQuery); + if (!genericQuery) { + TestClickBenchInternal(cases); + } + } + + Y_UNIT_TEST(ClickBenchSmoke) { + TAggregationTestCase q7; + q7.SetQuery(R"( + SELECT + AdvEngineID, COUNT(*) as c + FROM `/Root/benchTable` + WHERE AdvEngineID != 0 + GROUP BY AdvEngineID + ORDER BY c DESC + )") + //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") + // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 + // .SetExpectedReadNodeType("TableFullScan"); + .SetExpectedReadNodeType("Aggregate-TableFullScan"); + q7.FillExpectedAggregationGroupByPlanOptions(); + + TAggregationTestCase q9; + q9.SetQuery(R"( + SELECT + RegionID, SUM(AdvEngineID), COUNT(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) + FROM `/Root/benchTable` + GROUP BY RegionID + ORDER BY c DESC + LIMIT 10 + )") + //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") + // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 + // .SetExpectedReadNodeType("TableFullScan"); + .SetExpectedReadNodeType("Aggregate-TableFullScan"); + q9.FillExpectedAggregationGroupByPlanOptions(); + + TAggregationTestCase q12; + q12.SetQuery(R"( + SELECT + SearchPhrase, count(*) AS c + FROM `/Root/benchTable` + WHERE SearchPhrase != '' + GROUP BY SearchPhrase + ORDER BY c DESC + LIMIT 10; + )") + //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") + // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 + // .SetExpectedReadNodeType("TableFullScan"); + .SetExpectedReadNodeType("Aggregate-TableFullScan"); + q12.FillExpectedAggregationGroupByPlanOptions(); + + TAggregationTestCase q14; + q14.SetQuery(R"( + SELECT + SearchEngineID, SearchPhrase, count(*) AS c + FROM `/Root/benchTable` + WHERE SearchPhrase != '' + GROUP BY SearchEngineID, SearchPhrase + ORDER BY c DESC + LIMIT 10; + )") + //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") + // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 + // .SetExpectedReadNodeType("TableFullScan"); + .SetExpectedReadNodeType("Aggregate-TableFullScan"); + q14.FillExpectedAggregationGroupByPlanOptions(); + + TAggregationTestCase q22; + q22.SetQuery(R"( + SELECT + SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) + FROM `/Root/benchTable` + WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' + GROUP BY SearchPhrase + ORDER BY c DESC + LIMIT 10; + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .SetExpectedReadNodeType("Aggregate-TableFullScan"); + q22.FillExpectedAggregationGroupByPlanOptions(); + + TAggregationTestCase q39; + q39.SetQuery(R"( + SELECT TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst, COUNT(*) AS PageViews + FROM `/Root/benchTable` + WHERE CounterID = 62 AND EventDate >= Date('2013-07-01') AND EventDate <= Date('2013-07-31') AND IsRefresh == 0 + GROUP BY + TraficSourceID, SearchEngineID, AdvEngineID, IF (SearchEngineID = 0 AND AdvEngineID = 0, Referer, '') AS Src, + URL AS Dst + ORDER BY PageViews DESC + LIMIT 10; + )") + .AddExpectedPlanOptions("KqpOlapFilter") + .SetExpectedReadNodeType("Aggregate-Filter-TableFullScan"); + q39.FillExpectedAggregationGroupByPlanOptions(); + + std::vector cases = {q7, q9, q12, q14, q22, q39}; + for (auto&& c : cases) { + c.SetUseLlvm(NSan::PlainOrUnderSanitizer(true, false)); + } + + TestClickBench(cases); + } +} + +} diff --git a/ydb/core/kqp/ut/olap/helpers/aggregation.cpp b/ydb/core/kqp/ut/olap/helpers/aggregation.cpp new file mode 100644 index 000000000000..caee27934f5f --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/aggregation.cpp @@ -0,0 +1,140 @@ +#include "aggregation.h" +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +void TestAggregationsBase(const std::vector& cases) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + + for (auto&& i : cases) { + const TString queryFixed = i.GetFixedQuery(); + { + auto it = tableClient.StreamExecuteScanQuery(queryFixed).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + if (!i.GetExpectedReply().empty()) { + CompareYson(result, i.GetExpectedReply()); + } + } + CheckPlanForAggregatePushdown(queryFixed, tableClient, i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); + } +} + +void TestAggregationsInternal(const std::vector& cases) { + TPortManager tp; + ui16 mbusport = tp.GetPort(2134); + auto settings = Tests::TServerSettings(mbusport) + .SetDomainName("Root") + .SetUseRealThreads(false) + .SetNodeCount(2); + + Tests::TServer::TPtr server = new Tests::TServer(settings); + + auto runtime = server->GetRuntime(); + auto sender = runtime->AllocateEdgeActor(); + + InitRoot(server, sender); + Tests::NCommon::TLoggerInit(runtime).Initialize(); + + ui32 numShards = 1; + ui32 numIterations = 10; + TLocalHelper(*server).CreateTestOlapTable("olapTable", "olapStore", numShards, numShards); + const ui32 iterationPackSize = 2000; + for (ui64 i = 0; i < numIterations; ++i) { + TLocalHelper(*server).SendDataViaActorSystem("/Root/olapStore/olapTable", 0, 1000000 + i * 1000000, iterationPackSize); + } + + TAggregationTestCase currentTest; + auto captureEvents = [&](TAutoPtr& ev) -> auto { + switch (ev->GetTypeRewrite()) { + case NKqp::TKqpComputeEvents::EvScanData: + { + auto* msg = ev->Get(); + Y_ABORT_UNLESS(currentTest.MutableLimitChecker().CheckExpectedLimitOnScanData(msg->ArrowBatch ? msg->ArrowBatch->num_rows() : 0)); + Y_ABORT_UNLESS(currentTest.MutableRecordChecker().CheckExpectedOnScanData(msg->ArrowBatch ? msg->ArrowBatch->num_columns() : 0)); + break; + } + case TEvDataShard::EvKqpScan: + { + auto* msg = ev->Get(); + Y_ABORT_UNLESS(currentTest.MutableLimitChecker().CheckExpectedLimitOnScanTask(msg->Record.GetItemsLimit())); + break; + } + } + return TTestActorRuntime::EEventAction::PROCESS; + }; + runtime->SetObserverFunc(captureEvents); + + for (auto&& i : cases) { + const TString queryFixed = i.GetFixedQuery(); + currentTest = i; + auto streamSender = runtime->AllocateEdgeActor(); + NDataShard::NKqpHelpers::SendRequest(*runtime, streamSender, NDataShard::NKqpHelpers::MakeStreamRequest(streamSender, queryFixed, false)); + auto ev = runtime->GrabEdgeEventRethrow(streamSender, TDuration::Seconds(10)); + Y_ABORT_UNLESS(currentTest.CheckFinished()); + } +} + +void WriteTestDataForTableWithNulls(TKikimrRunner& kikimr, TString testTable) { + UNIT_ASSERT(testTable == "/Root/tableWithNulls"); // TODO: check schema instead + Tests::NCS::TTableWithNullsHelper lHelper(kikimr.GetTestServer()); + auto batch = lHelper.TestArrowBatch(); + lHelper.SendDataViaActorSystem(testTable, batch); +} + +void TestTableWithNulls(const std::vector& cases, const bool genericQuery /*= false*/) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTableWithNullsHelper(kikimr).CreateTableWithNulls(); + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls"); + } + + if (!genericQuery) { + auto tableClient = kikimr.GetTableClient(); + for (auto&& i : cases) { + RunTestCaseWithClient(i, tableClient); + CheckPlanForAggregatePushdown(i.GetFixedQuery(), tableClient, + i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); + } + } else { + auto queryClient = kikimr.GetQueryClient(); + for (auto&& i : cases) { + RunTestCaseWithClient(i, queryClient); + CheckPlanForAggregatePushdown(i.GetFixedQuery(), queryClient, + i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); + } + } +} + +void TestAggregations(const std::vector& cases) { + TestAggregationsBase(cases); + TestAggregationsInternal(cases); +} + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/aggregation.h b/ydb/core/kqp/ut/olap/helpers/aggregation.h new file mode 100644 index 000000000000..e83dcb32fd87 --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/aggregation.h @@ -0,0 +1,245 @@ +#pragma once +#include "writer.h" +#include "local.h" + +namespace NKikimr::NKqp { + +class TExpectedLimitChecker { +private: + std::optional ExpectedLimit; + std::optional ExpectedResultCount; + ui32 CheckScanData = 0; + ui32 CheckScanTask = 0; +public: + TExpectedLimitChecker& SetExpectedLimit(const ui32 value) { + ExpectedLimit = value; + ExpectedResultCount = value; + return *this; + } + TExpectedLimitChecker& SetExpectedResultCount(const ui32 value) { + ExpectedResultCount = value; + return *this; + } + bool CheckExpectedLimitOnScanData(const ui32 resultCount) { + if (!ExpectedResultCount) { + return true; + } + ++CheckScanData; + UNIT_ASSERT_LE(resultCount, *ExpectedResultCount); + return true; + } + bool CheckExpectedLimitOnScanTask(const ui32 taskLimit) { + if (!ExpectedLimit) { + return true; + } + ++CheckScanTask; + UNIT_ASSERT_EQUAL(taskLimit, *ExpectedLimit); + return true; + } + bool CheckFinish() const { + if (!ExpectedLimit) { + return true; + } + return CheckScanData && CheckScanTask; + } +}; + +class TExpectedRecordChecker { +private: + std::optional ExpectedColumnsCount; + ui32 CheckScanData = 0; +public: + TExpectedRecordChecker& SetExpectedColumnsCount(const ui32 value) { + ExpectedColumnsCount = value; + return *this; + } + bool CheckExpectedOnScanData(const ui32 columnsCount) { + if (!ExpectedColumnsCount) { + return true; + } + ++CheckScanData; + UNIT_ASSERT_EQUAL(columnsCount, *ExpectedColumnsCount); + return true; + } + bool CheckFinish() const { + if (!ExpectedColumnsCount) { + return true; + } + return CheckScanData; + } +}; + +class TAggregationTestCase { +private: + TString Query; + TString ExpectedReply; + std::vector ExpectedPlanOptions; + bool Pushdown = true; + std::string ExpectedReadNodeType; + TExpectedLimitChecker LimitChecker; + TExpectedRecordChecker RecordChecker; + bool UseLlvm = true; +public: + void FillExpectedAggregationGroupByPlanOptions() { +#if SSA_RUNTIME_VERSION >= 2U + // AddExpectedPlanOptions("TKqpOlapAgg"); + AddExpectedPlanOptions("WideCombiner"); +#else + AddExpectedPlanOptions("CombineCore"); +#endif + } + TString GetFixedQuery() const { + TStringBuilder queryFixed; + queryFixed << "--!syntax_v1" << Endl; + if (!Pushdown) { + queryFixed << "PRAGMA Kikimr.OptEnableOlapPushdown = \"false\";" << Endl; + } + if (!UseLlvm) { + queryFixed << "PRAGMA Kikimr.UseLlvm = \"false\";" << Endl; + } + queryFixed << "PRAGMA Kikimr.OptUseFinalizeByKey;" << Endl; + + queryFixed << Query << Endl; + Cerr << "REQUEST:\n" << queryFixed << Endl; + return queryFixed; + } + TAggregationTestCase() = default; + TExpectedLimitChecker& MutableLimitChecker() { + return LimitChecker; + } + TExpectedRecordChecker& MutableRecordChecker() { + return RecordChecker; + } + bool GetPushdown() const { + return Pushdown; + } + TAggregationTestCase& SetPushdown(const bool value = true) { + Pushdown = value; + return *this; + } + bool CheckFinished() const { + return LimitChecker.CheckFinish(); + } + + const TString& GetQuery() const { + return Query; + } + TAggregationTestCase& SetQuery(const TString& value) { + Query = value; + return *this; + } + TAggregationTestCase& SetUseLlvm(const bool value) { + UseLlvm = value; + return *this; + } + const TString& GetExpectedReply() const { + return ExpectedReply; + } + TAggregationTestCase& SetExpectedReply(const TString& value) { + ExpectedReply = value; + return *this; + } + + TAggregationTestCase& AddExpectedPlanOptions(const std::string& value) { + ExpectedPlanOptions.emplace_back(value); + return *this; + } + + const std::vector& GetExpectedPlanOptions() const { + return ExpectedPlanOptions; + } + + TAggregationTestCase& SetExpectedReadNodeType(const std::string& value) { + ExpectedReadNodeType = value; + return *this; + } + + const std::string& GetExpectedReadNodeType() const { + return ExpectedReadNodeType; + } +}; + +template +auto StreamExplainQuery(const TString& query, TClient& client) { + if constexpr (std::is_same_v) { + NYdb::NTable::TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); + return client.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + } else { + NYdb::NQuery::TExecuteQuerySettings scanSettings; + scanSettings.ExecMode(NYdb::NQuery::EExecMode::Explain); + return client.StreamExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), scanSettings).GetValueSync(); + } +} + +template +void CheckPlanForAggregatePushdown( + const TString& query, + TClient& client, + const std::vector& expectedPlanNodes, + const std::string& readNodeType) { + auto res = StreamExplainQuery(query, client); + UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); + + auto planRes = CollectStreamResult(res); + auto ast = planRes.QueryStats->Getquery_ast(); + Cerr << "JSON Plan:" << Endl; + Cerr << planRes.PlanJson.GetOrElse("NO_PLAN") << Endl; + Cerr << "AST:" << Endl; + Cerr << ast << Endl; + for (auto planNode : expectedPlanNodes) { + UNIT_ASSERT_C(ast.find(planNode) != std::string::npos, + TStringBuilder() << planNode << " was not found. Query: " << query); + } + UNIT_ASSERT_C(ast.find("SqueezeToDict") == std::string::npos, TStringBuilder() << "SqueezeToDict denied for aggregation requests. Query: " << query); + + if (!readNodeType.empty()) { + NJson::TJsonValue planJson; + NJson::ReadJsonTree(*planRes.PlanJson, &planJson, true); + auto readNode = FindPlanNodeByKv(planJson, "Node Type", readNodeType.c_str()); + UNIT_ASSERT(readNode.IsDefined()); + + auto& operators = readNode.GetMapSafe().at("Operators").GetArraySafe(); + for (auto& op : operators) { + if (op.GetMapSafe().at("Name") == "TableFullScan") { + auto ssaProgram = op.GetMapSafe().at("SsaProgram"); + UNIT_ASSERT(ssaProgram.IsDefined()); + UNIT_ASSERT(FindPlanNodes(ssaProgram, "Projection").size()); + break; + } + } + } +} + +void TestAggregationsBase(const std::vector& cases); + +void TestAggregationsInternal(const std::vector& cases); + +void TestAggregations(const std::vector& cases); + +template +auto StreamExecuteQuery(const TAggregationTestCase& testCase, TClient& client) { + if constexpr (std::is_same_v) { + return client.StreamExecuteScanQuery(testCase.GetFixedQuery()).GetValueSync(); + } else { + return client.StreamExecuteQuery( + testCase.GetFixedQuery(), + NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); + } +} + +template +void RunTestCaseWithClient(const TAggregationTestCase& testCase, TClient& client) { + auto it = StreamExecuteQuery(testCase, client); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + if (!testCase.GetExpectedReply().empty()) { + CompareYson(result, testCase.GetExpectedReply()); + } +} + +void WriteTestDataForTableWithNulls(TKikimrRunner& kikimr, TString testTable); + +void TestTableWithNulls(const std::vector& cases, const bool genericQuery = false); + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/get_value.cpp b/ydb/core/kqp/ut/olap/helpers/get_value.cpp new file mode 100644 index 000000000000..208e5e40e02e --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/get_value.cpp @@ -0,0 +1,112 @@ +#include "get_value.h" +#include + +namespace NKikimr::NKqp { + +void PrintValue(IOutputStream& out, const NYdb::TValue& v) { + NYdb::TValueParser value(v); + + while (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { + if (value.IsNull()) { + out << ""; + return; + } else { + value.OpenOptional(); + } + } + + if (value.IsNull()) { + out << ""; + return; + } + + switch (value.GetPrimitiveType()) { + case NYdb::EPrimitiveType::Uint32: + { + out << value.GetUint32(); + break; + } + case NYdb::EPrimitiveType::Uint64: + { + out << value.GetUint64(); + break; + } + case NYdb::EPrimitiveType::Int64: + { + out << value.GetInt64(); + break; + } + case NYdb::EPrimitiveType::Utf8: + { + out << value.GetUtf8(); + break; + } + case NYdb::EPrimitiveType::Timestamp: + { + out << value.GetTimestamp(); + break; + } + case NYdb::EPrimitiveType::Bool: + { + out << value.GetBool(); + break; + } + default: + { + UNIT_ASSERT_C(false, "PrintValue not iplemented for this type"); + } + } +} + +ui64 GetUint32(const NYdb::TValue& v) { + NYdb::TValueParser value(v); + if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { + return *value.GetOptionalUint32(); + } else { + return value.GetUint32(); + } +} + +ui64 GetUint64(const NYdb::TValue& v) { + NYdb::TValueParser value(v); + if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { + return *value.GetOptionalUint64(); + } else { + return value.GetUint64(); + } +} + +TString GetUtf8(const NYdb::TValue& v) { + NYdb::TValueParser value(v); + if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { + return *value.GetOptionalUtf8(); + } else { + return value.GetUtf8(); + } +} + +TInstant GetTimestamp(const NYdb::TValue& v) { + NYdb::TValueParser value(v); + if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { + return *value.GetOptionalTimestamp(); + } else { + return value.GetTimestamp(); + } +} + +void PrintRow(IOutputStream& out, const THashMap& fields) { + for (const auto& f : fields) { + out << f.first << ": "; + PrintValue(out, f.second); + out << " "; + } +} + +void PrintRows(IOutputStream& out, const TVector>& rows) { + for (const auto& r : rows) { + PrintRow(out, r); + out << "\n"; + } +} + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/get_value.h b/ydb/core/kqp/ut/olap/helpers/get_value.h new file mode 100644 index 000000000000..7902b816caef --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/get_value.h @@ -0,0 +1,15 @@ +#pragma once +#include + +namespace NKikimr::NKqp { + +void PrintValue(IOutputStream& out, const NYdb::TValue& v); +void PrintRow(IOutputStream& out, const THashMap& fields); +void PrintRows(IOutputStream& out, const TVector>& rows); + +ui64 GetUint32(const NYdb::TValue& v); +ui64 GetUint64(const NYdb::TValue& v); +TString GetUtf8(const NYdb::TValue& v); +TInstant GetTimestamp(const NYdb::TValue& v); + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/local.cpp b/ydb/core/kqp/ut/olap/helpers/local.cpp new file mode 100644 index 000000000000..373b62027e62 --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/local.cpp @@ -0,0 +1,22 @@ +#include "local.h" + +namespace NKikimr::NKqp { + +void TTableWithNullsHelper::CreateTableWithNulls(TString tableName /*= "tableWithNulls"*/, ui32 shardsCount /*= 4*/) { + TActorId sender = Server.GetRuntime()->AllocateEdgeActor(); + + TBase::CreateTestOlapTable(sender, "", Sprintf(R"( + Name: "%s" + ColumnShardCount: %d + Schema { + %s + } + Sharding { + HashSharding { + Function: HASH_FUNCTION_CONSISTENCY_64 + Columns: "id" + } + })", tableName.c_str(), shardsCount, PROTO_SCHEMA)); +} + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/local.h b/ydb/core/kqp/ut/olap/helpers/local.h new file mode 100644 index 000000000000..dc957f98220e --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/local.h @@ -0,0 +1,41 @@ +#pragma once +#include +#include + +namespace NKikimr::NKqp { + +class TTableWithNullsHelper: public Tests::NCS::TTableWithNullsHelper { +private: + using TBase = Tests::NCS::TTableWithNullsHelper; +public: + using TBase::TBase; + + TTableWithNullsHelper(TKikimrRunner& runner) + : TBase(runner.GetTestServer()) { + } + + void CreateTableWithNulls(TString tableName = "tableWithNulls", ui32 shardsCount = 4); +}; + +class TLocalHelper: public Tests::NCS::THelper { +private: + using TBase = Tests::NCS::THelper; +public: + TLocalHelper& SetShardingMethod(const TString& value) { + TBase::SetShardingMethod(value); + return *this; + } + + void CreateTestOlapTable(TString tableName = "olapTable", TString storeName = "olapStore", + ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { + CreateOlapTableWithStore(tableName, storeName, storeShardsCount, tableShardsCount); + } + using TBase::TBase; + + TLocalHelper(TKikimrRunner& runner) + : TBase(runner.GetTestServer()) { + + } +}; + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/query_executor.cpp b/ydb/core/kqp/ut/olap/helpers/query_executor.cpp new file mode 100644 index 000000000000..11ef90591b5e --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/query_executor.cpp @@ -0,0 +1,73 @@ +#include "query_executor.h" +#include "get_value.h" +#include +#include + +namespace NKikimr::NKqp { + +TVector> CollectRows(NYdb::NTable::TScanQueryPartIterator& it, NJson::TJsonValue* statInfo /*= nullptr*/, NJson::TJsonValue* diagnostics /*= nullptr*/) { + TVector> rows; + if (statInfo) { + *statInfo = NJson::JSON_NULL; + } + if (diagnostics) { + *diagnostics = NJson::JSON_NULL; + } + for (;;) { + auto streamPart = it.ReadNext().GetValueSync(); + if (!streamPart.IsSuccess()) { + UNIT_ASSERT_C(streamPart.EOS(), streamPart.GetIssues().ToString()); + break; + } + + UNIT_ASSERT_C(streamPart.HasResultSet() || streamPart.HasQueryStats(), + "Unexpected empty scan query response."); + + if (streamPart.HasQueryStats()) { + auto plan = streamPart.GetQueryStats().GetPlan(); + if (plan && statInfo) { + UNIT_ASSERT(NJson::ReadJsonFastTree(*plan, statInfo)); + } + } + + if (streamPart.HasDiagnostics()) { + TString diagnosticsString = streamPart.GetDiagnostics(); + if (!diagnosticsString.empty() && diagnostics) { + UNIT_ASSERT(NJson::ReadJsonFastTree(diagnosticsString, diagnostics)); + } + } + + if (streamPart.HasResultSet()) { + auto resultSet = streamPart.ExtractResultSet(); + NYdb::TResultSetParser rsParser(resultSet); + while (rsParser.TryNextRow()) { + THashMap row; + for (size_t ci = 0; ci < resultSet.ColumnsCount(); ++ci) { + row.emplace(resultSet.GetColumnsMeta()[ci].Name, rsParser.GetValue(ci)); + } + rows.emplace_back(std::move(row)); + } + } + } + return rows; +} + +TVector> ExecuteScanQuery(NYdb::NTable::TTableClient& tableClient, const TString& query, const bool verbose /*= true*/) { + if (verbose) { + Cerr << "====================================\n" + << "QUERY:\n" << query + << "\n\nRESULT:\n"; + } + + NYdb::NTable::TStreamExecScanQuerySettings scanSettings; + auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + auto rows = CollectRows(it); + if (verbose) { + PrintRows(Cerr, rows); + Cerr << "\n"; + } + + return rows; +} + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/query_executor.h b/ydb/core/kqp/ut/olap/helpers/query_executor.h new file mode 100644 index 000000000000..18dad7f72f34 --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/query_executor.h @@ -0,0 +1,12 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +TVector> CollectRows(NYdb::NTable::TScanQueryPartIterator& it, NJson::TJsonValue* statInfo = nullptr, NJson::TJsonValue* diagnostics = nullptr); +TVector> ExecuteScanQuery(NYdb::NTable::TTableClient& tableClient, const TString& query, const bool verbose = true); + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.cpp b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp new file mode 100644 index 000000000000..e592ed398d2b --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.cpp @@ -0,0 +1,161 @@ +#include "typed_local.h" +#include "query_executor.h" +#include "get_value.h" + +namespace NKikimr::NKqp { + +TString TTypedLocalHelper::GetTestTableSchema() const { + TString result; + if (TypeName) { + result = R"(Columns { Name: "field" Type: ")" + TypeName + "\"}"; + } + result += R"( + Columns { Name: "pk_int" Type: "Int64" NotNull: true } + Columns { Name: "ts" Type: "Timestamp" } + KeyColumnNames: "pk_int" + Engine: COLUMN_ENGINE_REPLACING_TIMESERIES + )"; + return result; +} + +void TTypedLocalHelper::ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus /*= EStatus::SUCCESS*/) const { + auto session = KikimrRunner.GetTableClient().CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), expectedStatus, alterResult.GetIssues().ToString()); +} + +TString TTypedLocalHelper::GetQueryResult(const TString& request) const { + auto db = KikimrRunner.GetQueryClient(); + auto result = db.ExecuteQuery(request, NYdb::NQuery::TTxControl::BeginTx().CommitTx()).ExtractValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(result.GetStatus(), NYdb::EStatus::SUCCESS, result.GetIssues().ToString()); + const TString output = FormatResultSetYson(result.GetResultSet(0)); + Cout << output << Endl; + return output; +} + +void TTypedLocalHelper::PrintCount() { + const TString selectQuery = "SELECT COUNT(*), MAX(pk_int), MIN(pk_int) FROM `" + TablePath + "`"; + + auto tableClient = KikimrRunner.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, selectQuery); + for (auto&& r : rows) { + for (auto&& c : r) { + Cerr << c.first << ":" << Endl << c.second.GetProto().DebugString() << Endl; + } + } +} + +NKikimr::NKqp::TTypedLocalHelper::TDistribution TTypedLocalHelper::GetDistribution(const bool verbose /*= false*/) { + const TString selectQuery = "PRAGMA Kikimr.OptUseFinalizeByKey='true';SELECT COUNT(*) as c, field FROM `" + TablePath + "` GROUP BY field ORDER BY field"; + + auto tableClient = KikimrRunner.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, selectQuery, verbose); + ui32 count = 0; + std::optional minCount; + std::optional maxCount; + std::set groups; + for (auto&& r : rows) { + for (auto&& c : r) { + if (c.first == "c") { + const ui64 v = GetUint64(c.second); + count += v; + if (!minCount || *minCount > v) { + minCount = v; + } + if (!maxCount || *maxCount < v) { + maxCount = v; + } + } else if (c.first == "field") { + Y_ABORT_UNLESS(groups.emplace(c.second.GetProto().DebugString()).second); + } + if (verbose) { + Cerr << c.first << ":" << Endl << c.second.GetProto().DebugString() << Endl; + } + } + } + Y_ABORT_UNLESS(maxCount); + Y_ABORT_UNLESS(minCount); + return TDistribution(count, *minCount, *maxCount, groups.size()); +} + +void TTypedLocalHelper::GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose /*= false*/, const std::vector columnNames /*= {}*/) { + TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats` WHERE Activity = true"; + if (columnNames.size()) { + selectQuery += " AND EntityName IN ('" + JoinSeq("','", columnNames) + "')"; + } + + auto tableClient = KikimrRunner.GetTableClient(); + + std::optional rawBytesPred; + std::optional bytesPred; + while (true) { + auto rows = ExecuteScanQuery(tableClient, selectQuery); + rawBytes = 0; + bytes = 0; + for (auto&& r : rows) { + if (verbose) { + Cerr << "-------" << Endl; + } + for (auto&& c : r) { + if (c.first == "RawBytes") { + rawBytes += GetUint64(c.second); + } + if (c.first == "BlobRangeSize") { + bytes += GetUint64(c.second); + } + if (verbose) { + Cerr << c.first << ":" << Endl << c.second.GetProto().DebugString() << Endl; + } + } + } + if (rawBytesPred && *rawBytesPred == rawBytes && bytesPred && *bytesPred == bytes) { + break; + } else { + rawBytesPred = rawBytes; + bytesPred = bytes; + Cerr << "Wait changes: " << bytes << "/" << rawBytes << Endl; + Sleep(TDuration::Seconds(5)); + } + } + Cerr << bytes << "/" << rawBytes << Endl; +} + +void TTypedLocalHelper::GetCount(ui64& count) { + const TString selectQuery = "SELECT COUNT(*) as a FROM `" + TablePath + "`"; + + auto tableClient = KikimrRunner.GetTableClient(); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + for (auto&& r : rows) { + for (auto&& c : r) { + if (c.first == "a") { + count = GetUint64(c.second); + } + } + } +} + +void TTypedLocalHelper::FillPKOnly(const double pkKff /*= 0*/, const ui32 numRows /*= 800000*/) const { + std::vector builders; + builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); + NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders); + std::shared_ptr batch = batchBuilder.BuildBatch(numRows); + TBase::SendDataViaActorSystem(TablePath, batch); +} + +void TTypedLocalHelper::GetStats(std::vector& stats, const bool verbose /*= false*/) { + TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_portion_stats` WHERE Activity = true"; + auto tableClient = KikimrRunner.GetTableClient(); + auto rows = ExecuteScanQuery(tableClient, selectQuery, verbose); + for (auto&& r : rows) { + for (auto&& c : r) { + if (c.first == "Stats") { + NKikimrColumnShardStatisticsProto::TPortionStorage store; + AFL_VERIFY(google::protobuf::TextFormat::ParseFromString(GetUtf8(c.second), &store)); + stats.emplace_back(store); + } + } + } +} + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/typed_local.h b/ydb/core/kqp/ut/olap/helpers/typed_local.h new file mode 100644 index 000000000000..1afef6b7a19b --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/typed_local.h @@ -0,0 +1,90 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +class TTypedLocalHelper: public Tests::NCS::THelper { +private: + using TBase = Tests::NCS::THelper; + const TString TypeName; + TKikimrRunner& KikimrRunner; + const TString TablePath; + const TString TableName; + const TString StoreName; +protected: + virtual TString GetTestTableSchema() const override; + virtual std::vector GetShardingColumns() const override { + return {"pk_int"}; + } +public: + TTypedLocalHelper(const TString& typeName, TKikimrRunner& kikimrRunner, const TString& tableName = "olapTable", const TString& storeName = "olapStore") + : TBase(kikimrRunner.GetTestServer()) + , TypeName(typeName) + , KikimrRunner(kikimrRunner) + , TablePath("/Root/" + storeName + "/" + tableName) + , TableName(tableName) + , StoreName(storeName) { + SetShardingMethod("HASH_FUNCTION_CONSISTENCY_64"); + } + + void ExecuteSchemeQuery(const TString& alterQuery, const NYdb::EStatus expectedStatus = NYdb::EStatus::SUCCESS) const; + + TString GetQueryResult(const TString& request) const; + + void PrintCount(); + + class TDistribution { + private: + YDB_READONLY(ui32, Count, 0); + YDB_READONLY(ui32, MinCount, 0); + YDB_READONLY(ui32, MaxCount, 0); + YDB_READONLY(ui32, GroupsCount, 0); + public: + TDistribution(const ui32 count, const ui32 minCount, const ui32 maxCount, const ui32 groupsCount) + : Count(count) + , MinCount(minCount) + , MaxCount(maxCount) + , GroupsCount(groupsCount) { + + } + + TString DebugString() const { + return TStringBuilder() + << "count=" << Count << ";" + << "min_count=" << MinCount << ";" + << "max_count=" << MaxCount << ";" + << "groups_count=" << GroupsCount << ";"; + } + }; + + TDistribution GetDistribution(const bool verbose = false); + + void GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose = false, const std::vector columnNames = {}); + + void GetStats(std::vector& stats, const bool verbose = false); + + void GetCount(ui64& count); + + template + void FillTable(const TFiller& fillPolicy, const ui32 pkKff = 0, const ui32 numRows = 800000) const { + std::vector builders; + builders.emplace_back(NArrow::NConstruction::TSimpleArrayConstructor>::BuildNotNullable("pk_int", numRows * pkKff)); + builders.emplace_back(std::make_shared>("field", fillPolicy)); + NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders); + std::shared_ptr batch = batchBuilder.BuildBatch(numRows); + TBase::SendDataViaActorSystem(TablePath, batch); + } + + void FillPKOnly(const double pkKff = 0, const ui32 numRows = 800000) const; + + void CreateTestOlapTable(ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { + CreateOlapTableWithStore(TableName, StoreName, storeShardsCount, tableShardsCount); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/writer.cpp b/ydb/core/kqp/ut/olap/helpers/writer.cpp new file mode 100644 index 000000000000..e82645f5e988 --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/writer.cpp @@ -0,0 +1,16 @@ +#include "writer.h" +#include "local.h" + +namespace NKikimr::NKqp { + +void WriteTestData(TKikimrRunner& kikimr, TString testTable, ui64 pathIdBegin, ui64 tsBegin, size_t rowCount, bool withSomeNulls /*= false*/) { + UNIT_ASSERT(testTable != "/Root/benchTable"); // TODO: check schema instead + TLocalHelper lHelper(kikimr); + if (withSomeNulls) { + lHelper.WithSomeNulls(); + } + auto batch = lHelper.TestArrowBatch(pathIdBegin, tsBegin, rowCount); + lHelper.SendDataViaActorSystem(testTable, batch); +} + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/writer.h b/ydb/core/kqp/ut/olap/helpers/writer.h new file mode 100644 index 000000000000..acaf73d151d4 --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/writer.h @@ -0,0 +1,8 @@ +#pragma once +#include + +namespace NKikimr::NKqp { + +void WriteTestData(TKikimrRunner& kikimr, TString testTable, ui64 pathIdBegin, ui64 tsBegin, size_t rowCount, bool withSomeNulls = false); + +} \ No newline at end of file diff --git a/ydb/core/kqp/ut/olap/helpers/ya.make b/ydb/core/kqp/ut/olap/helpers/ya.make new file mode 100644 index 000000000000..33fc652a32db --- /dev/null +++ b/ydb/core/kqp/ut/olap/helpers/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +SRCS( + local.cpp + query_executor.cpp + typed_local.cpp + writer.cpp + get_value.cpp + aggregation.cpp +) + +PEERDIR( + ydb/core/testlib +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/kqp/ut/olap/indexes_ut.cpp b/ydb/core/kqp/ut/olap/indexes_ut.cpp new file mode 100644 index 000000000000..5db56eb19848 --- /dev/null +++ b/ydb/core/kqp/ut/olap/indexes_ut.cpp @@ -0,0 +1,367 @@ +#include "helpers/local.h" +#include "helpers/writer.h" + +#include +#include +#include +#include + +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapIndexes) { + Y_UNIT_TEST(IndexesActualization) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + csController->SetLagForCompactionBeforeTierings(TDuration::Seconds(1)); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + Tests::NCommon::TLoggerInit(kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + + std::vector uids; + std::vector resourceIds; + std::vector levels; + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + + const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { + for (ui32 i = 0; i < count; ++i) { + uids.emplace_back("uid_" + ::ToString(startUid + i)); + resourceIds.emplace_back(::ToString(startRes + i)); + levels.emplace_back(i % 5); + } + }; + + filler(1000000, 300000000, 10000); + filler(1100000, 300100000, 10000); + filler(1200000, 300200000, 10000); + filler(1300000, 300300000, 10000); + filler(1400000, 300400000, 10000); + filler(2000000, 200000000, 70000); + filler(3000000, 100000000, 110000); + + } + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + { + auto alterQuery = TStringBuilder() << + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + csController->WaitActualization(TDuration::Seconds(10)); + { + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + WHERE ((resource_id = '2' AND level = 222222) OR (resource_id = '1' AND level = 111111) OR (resource_id LIKE '%11dd%')) AND uid = '222' + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cerr << result << Endl; + Cerr << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << Endl; + CompareYson(result, R"([[0u;]])"); + AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val() * 0.4) + ("approve", csController->GetIndexesApprovedOnSelect().Val())("skip", csController->GetIndexesSkippingOnSelect().Val()); + } + } + + Y_UNIT_TEST(IndexesActualizationRebuildScheme) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + std::vector uids; + std::vector resourceIds; + std::vector levels; + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); + + const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { + for (ui32 i = 0; i < count; ++i) { + uids.emplace_back("uid_" + ::ToString(startUid + i)); + resourceIds.emplace_back(::ToString(startRes + i)); + levels.emplace_back(i % 5); + } + }; + + filler(1000000, 300000000, 10000); + filler(1100000, 300100000, 10000); + + } + + for (ui32 i = 0; i < 10; ++i) { + auto alterQuery = TStringBuilder() << + "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + const ui64 startCount = csController->GetActualizationRefreshSchemeCount().Val(); + AFL_VERIFY(startCount == 30); + + for (auto&& i : csController->GetShardActualIds()) { + kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new TEvents::TEvPoisonPill(), i, false)); + } + + { + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + )").GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[20000u;]])"); + } + + AFL_VERIFY(startCount + 3 /*tables count*/ * 2 /*normalizers + main_load*/ == + (ui64)csController->GetActualizationRefreshSchemeCount().Val())("start", startCount)("count", csController->GetActualizationRefreshSchemeCount().Val()); + } + + Y_UNIT_TEST(Indexes) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + +// Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + std::vector uids; + std::vector resourceIds; + std::vector levels; + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); + + const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { + for (ui32 i = 0; i < count; ++i) { + uids.emplace_back("uid_" + ::ToString(startUid + i)); + resourceIds.emplace_back(::ToString(startRes + i)); + levels.emplace_back(i % 5); + } + }; + + filler(1000000, 300000000, 10000); + filler(1100000, 300100000, 10000); + filler(1200000, 300200000, 10000); + filler(1300000, 300300000, 10000); + filler(1400000, 300400000, 10000); + filler(2000000, 200000000, 70000); + filler(3000000, 100000000, 110000); + + } + + { + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + CompareYson(result, R"([[230000u;]])"); + } + + AFL_VERIFY(csController->GetIndexesSkippingOnSelect().Val() == 0); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() == 0); + TInstant start = Now(); + ui32 compactionsStart = csController->GetCompactionStartedCounter().Val(); + while (Now() - start < TDuration::Seconds(10)) { + if (compactionsStart != csController->GetCompactionStartedCounter().Val()) { + compactionsStart = csController->GetCompactionStartedCounter().Val(); + start = Now(); + } + Cerr << "WAIT_COMPACTION: " << csController->GetCompactionStartedCounter().Val() << Endl; + Sleep(TDuration::Seconds(1)); + } + + { + auto it = tableClient.StreamExecuteScanQuery(R"( + --!syntax_v1 + + SELECT + COUNT(*) + FROM `/Root/olapStore/olapTable` + WHERE ((resource_id = '2' AND level = 222222) OR (resource_id = '1' AND level = 111111) OR (resource_id LIKE '%11dd%')) AND uid = '222' + )").GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << result << Endl; + Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << Endl; + CompareYson(result, R"([[0u;]])"); + AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val() * 0.3); + } + ui32 requestsCount = 100; + for (ui32 i = 0; i < requestsCount; ++i) { + const ui32 idx = RandomNumber(uids.size()); + const auto query = [](const TString& res, const TString& uid, const ui32 level) { + TStringBuilder sb; + sb << "SELECT" << Endl; + sb << "COUNT(*)" << Endl; + sb << "FROM `/Root/olapStore/olapTable`" << Endl; + sb << "WHERE(" << Endl; + sb << "resource_id = '" << res << "' AND" << Endl; + sb << "uid= '" << uid << "' AND" << Endl; + sb << "level= " << level << Endl; + sb << ")"; + return sb; + }; + auto it = tableClient.StreamExecuteScanQuery(query(resourceIds[idx], uids[idx], levels[idx])).GetValueSync(); + + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); + TString result = StreamResultToYson(it); + Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; + CompareYson(result, R"([[1u;]])"); + } + + AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < 0.20 * csController->GetIndexesSkippingOnSelect().Val()); + + } + + Y_UNIT_TEST(IndexesModificationError) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr).CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + + // Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid", "resource_id"], "false_positive_probability" : 0.05}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(alterResult.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.005}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL(alterResult.GetStatus(), NYdb::EStatus::SUCCESS); + } + + { + auto alterQuery = TStringBuilder() << + R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, + FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.01}`); + )"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + { + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=index_uid);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + + } +} + +} diff --git a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp index 417ba6527433..a8e14ea51c3e 100644 --- a/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp +++ b/ydb/core/kqp/ut/olap/kqp_olap_ut.cpp @@ -1,423 +1,32 @@ -#include -#include - -#include -#include +#include -#include -#include +#include "helpers/get_value.h" +#include "helpers/query_executor.h" +#include "helpers/local.h" +#include "helpers/writer.h" +#include "helpers/aggregation.h" -#include -#include -#include -#include -#include +#include #include -#include +#include +#include +#include #include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace NKikimr { -namespace NKqp { - -using namespace NKikimr::NDataShard::NKqpHelpers; + +#include +#include + +#include + +namespace NKikimr::NKqp { using namespace NSchemeShard; using namespace NActors; using namespace NYdb; using namespace NYdb::NTable; using namespace NYdb::NScheme; -using TEvBulkUpsertRequest = NGRpcService::TGrpcRequestOperationCall; - Y_UNIT_TEST_SUITE(KqpOlap) { - void PrintValue(IOutputStream& out, const NYdb::TValue& v) { - NYdb::TValueParser value(v); - - while (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { - if (value.IsNull()) { - out << ""; - return; - } else { - value.OpenOptional(); - } - } - - if (value.IsNull()) { - out << ""; - return; - } - - switch (value.GetPrimitiveType()) { - case NYdb::EPrimitiveType::Uint32: - { - out << value.GetUint32(); - break; - } - case NYdb::EPrimitiveType::Uint64: - { - out << value.GetUint64(); - break; - } - case NYdb::EPrimitiveType::Int64: - { - out << value.GetInt64(); - break; - } - case NYdb::EPrimitiveType::Utf8: - { - out << value.GetUtf8(); - break; - } - case NYdb::EPrimitiveType::Timestamp: - { - out << value.GetTimestamp(); - break; - } - case NYdb::EPrimitiveType::Bool: - { - out << value.GetBool(); - break; - } - default: - { - UNIT_ASSERT_C(false, "PrintValue not iplemented for this type"); - } - } - } - - void PrintRow(IOutputStream& out, const THashMap& fields) { - for (const auto& f : fields) { - out << f.first << ": "; - PrintValue(out, f.second); - out << " "; - } - } - - void PrintRows(IOutputStream& out, const TVector>& rows) { - for (const auto& r : rows) { - PrintRow(out, r); - out << "\n"; - } - } - - TVector> CollectRows(NYdb::NTable::TScanQueryPartIterator& it, NJson::TJsonValue* statInfo = nullptr, NJson::TJsonValue* diagnostics = nullptr) { - TVector> rows; - if (statInfo) { - *statInfo = NJson::JSON_NULL; - } - if (diagnostics) { - *diagnostics = NJson::JSON_NULL; - } - for (;;) { - auto streamPart = it.ReadNext().GetValueSync(); - if (!streamPart.IsSuccess()) { - UNIT_ASSERT_C(streamPart.EOS(), streamPart.GetIssues().ToString()); - break; - } - - UNIT_ASSERT_C(streamPart.HasResultSet() || streamPart.HasQueryStats(), - "Unexpected empty scan query response."); - - if (streamPart.HasQueryStats()) { - auto plan = streamPart.GetQueryStats().GetPlan(); - if (plan && statInfo) { - UNIT_ASSERT(NJson::ReadJsonFastTree(*plan, statInfo)); - } - } - - if (streamPart.HasDiagnostics()) { - TString diagnosticsString = streamPart.GetDiagnostics(); - if (!diagnosticsString.empty() && diagnostics) { - UNIT_ASSERT(NJson::ReadJsonFastTree(diagnosticsString, diagnostics)); - } - } - - if (streamPart.HasResultSet()) { - auto resultSet = streamPart.ExtractResultSet(); - NYdb::TResultSetParser rsParser(resultSet); - while (rsParser.TryNextRow()) { - THashMap row; - for (size_t ci = 0; ci < resultSet.ColumnsCount(); ++ci) { - row.emplace(resultSet.GetColumnsMeta()[ci].Name, rsParser.GetValue(ci)); - } - rows.emplace_back(std::move(row)); - } - } - } - return rows; - } - - TVector> ExecuteScanQuery(NYdb::NTable::TTableClient& tableClient, const TString& query, const bool verbose = true) { - if (verbose) { - Cerr << "====================================\n" - << "QUERY:\n" << query - << "\n\nRESULT:\n"; - } - - TStreamExecScanQuerySettings scanSettings; - auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); - auto rows = CollectRows(it); - if (verbose) { - PrintRows(Cerr, rows); - Cerr << "\n"; - } - - return rows; - } - - ui64 GetUint32(const NYdb::TValue& v) { - NYdb::TValueParser value(v); - if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { - return *value.GetOptionalUint32(); - } else { - return value.GetUint32(); - } - } - - ui64 GetUint64(const NYdb::TValue& v) { - NYdb::TValueParser value(v); - if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { - return *value.GetOptionalUint64(); - } else { - return value.GetUint64(); - } - } - - TString GetUtf8(const NYdb::TValue& v) { - NYdb::TValueParser value(v); - if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { - return *value.GetOptionalUtf8(); - } else { - return value.GetUtf8(); - } - } - - TInstant GetTimestamp(const NYdb::TValue& v) { - NYdb::TValueParser value(v); - if (value.GetKind() == NYdb::TTypeParser::ETypeKind::Optional) { - return *value.GetOptionalTimestamp(); - } else { - return value.GetTimestamp(); - } - } - - class TTypedLocalHelper: public Tests::NCS::THelper { - private: - using TBase = Tests::NCS::THelper; - const TString TypeName; - TKikimrRunner& KikimrRunner; - const TString TablePath; - const TString TableName; - const TString StoreName; - protected: - virtual TString GetTestTableSchema() const override { - TString result; - if (TypeName) { - result = R"(Columns { Name: "field" Type: ")" + TypeName + "\"}"; - } - result += R"( - Columns { Name: "pk_int" Type: "Int64" } - KeyColumnNames: "pk_int" - Engine: COLUMN_ENGINE_REPLACING_TIMESERIES - )"; - return result; - } - virtual std::vector GetShardingColumns() const override { - return { "pk_int" }; - } - public: - TTypedLocalHelper(const TString& typeName, TKikimrRunner& kikimrRunner, const TString& tableName = "olapTable", const TString& storeName = "olapStore") - : TBase(kikimrRunner.GetTestServer()) - , TypeName(typeName) - , KikimrRunner(kikimrRunner) - , TablePath("/Root/" + storeName + "/" + tableName) - , TableName(tableName) - , StoreName(storeName) - { - SetShardingMethod("HASH_FUNCTION_CONSISTENCY_64"); - } - - void PrintCount() { - const TString selectQuery = "SELECT COUNT(*), MAX(pk_int), MIN(pk_int) FROM `" + TablePath + "`"; - - auto tableClient = KikimrRunner.GetTableClient(); - auto rows = ExecuteScanQuery(tableClient, selectQuery); - for (auto&& r : rows) { - for (auto&& c : r) { - Cerr << c.first << ":" << Endl << c.second.GetProto().DebugString() << Endl; - } - } - } - - class TDistribution { - private: - YDB_READONLY(ui32, Count, 0); - YDB_READONLY(ui32, MinCount, 0); - YDB_READONLY(ui32, MaxCount, 0); - YDB_READONLY(ui32, GroupsCount, 0); - public: - TDistribution(const ui32 count, const ui32 minCount, const ui32 maxCount, const ui32 groupsCount) - : Count(count) - , MinCount(minCount) - , MaxCount(maxCount) - , GroupsCount(groupsCount) - { - - } - - TString DebugString() const { - return TStringBuilder() - << "count=" << Count << ";" - << "min_count=" << MinCount << ";" - << "max_count=" << MaxCount << ";" - << "groups_count=" << GroupsCount << ";"; - } - }; - - TDistribution GetDistribution(const bool verbose = false) { - const TString selectQuery = "PRAGMA Kikimr.OptUseFinalizeByKey='true';SELECT COUNT(*) as c, field FROM `" + TablePath + "` GROUP BY field ORDER BY field"; - - auto tableClient = KikimrRunner.GetTableClient(); - auto rows = ExecuteScanQuery(tableClient, selectQuery, verbose); - ui32 count = 0; - std::optional minCount; - std::optional maxCount; - std::set groups; - for (auto&& r : rows) { - for (auto&& c : r) { - if (c.first == "c") { - const ui64 v = GetUint64(c.second); - count += v; - if (!minCount || *minCount > v) { - minCount = v; - } - if (!maxCount || *maxCount < v) { - maxCount = v; - } - } else if (c.first == "field") { - Y_ABORT_UNLESS(groups.emplace(c.second.GetProto().DebugString()).second); - } - if (verbose) { - Cerr << c.first << ":" << Endl << c.second.GetProto().DebugString() << Endl; - } - } - } - Y_ABORT_UNLESS(maxCount); - Y_ABORT_UNLESS(minCount); - return TDistribution(count, *minCount, *maxCount, groups.size()); - } - - void GetVolumes(ui64& rawBytes, ui64& bytes, const bool verbose = false) { - const TString selectQuery = "SELECT * FROM `" + TablePath + "/.sys/primary_index_stats`"; - - auto tableClient = KikimrRunner.GetTableClient(); - - std::optional rawBytesPred; - std::optional bytesPred; - while (true) { - auto rows = ExecuteScanQuery(tableClient, selectQuery); - rawBytes = 0; - bytes = 0; - for (auto&& r : rows) { - if (verbose) { - Cerr << "-------" << Endl; - } - for (auto&& c : r) { - if (c.first == "RawBytes") { - rawBytes += GetUint64(c.second); - } - if (c.first == "BlobRangeSize") { - bytes += GetUint64(c.second); - } - if (verbose) { - Cerr << c.first << ":" << Endl << c.second.GetProto().DebugString() << Endl; - } - } - } - if (rawBytesPred && *rawBytesPred == rawBytes && bytesPred && *bytesPred == bytes) { - break; - } else { - rawBytesPred = rawBytes; - bytesPred = bytes; - Cerr << "Wait changes: " << bytes << "/" << rawBytes << Endl; - Sleep(TDuration::Seconds(5)); - } - } - Cerr << bytes << "/" << rawBytes << Endl; - } - - void GetCount(ui64& count) { - const TString selectQuery = "SELECT COUNT(*) as a FROM `" + TablePath + "`"; - - auto tableClient = KikimrRunner.GetTableClient(); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - for (auto&& r : rows) { - for (auto&& c : r) { - if (c.first == "a") { - count = GetUint64(c.second); - } - } - } - } - - template - void FillTable(const TFiller& fillPolicy, const ui32 pkKff = 0, const ui32 numRows = 800000) const { - std::vector builders; - builders.emplace_back(std::make_shared>>("pk_int", numRows * pkKff)); - builders.emplace_back(std::make_shared>("field", fillPolicy)); - NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders); - std::shared_ptr batch = batchBuilder.BuildBatch(numRows); - TBase::SendDataViaActorSystem(TablePath, batch); - } - - void FillPKOnly(const ui32 pkKff = 0, const ui32 numRows = 800000) const { - std::vector builders; - builders.emplace_back(std::make_shared>>("pk_int", numRows * pkKff)); - NArrow::NConstruction::TRecordBatchConstructor batchBuilder(builders); - std::shared_ptr batch = batchBuilder.BuildBatch(numRows); - TBase::SendDataViaActorSystem(TablePath, batch); - } - - void CreateTestOlapTable(ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { - CreateOlapTableWithStore(TableName, StoreName, storeShardsCount, tableShardsCount); - } - }; - - class TLocalHelper: public Tests::NCS::THelper { - private: - using TBase = Tests::NCS::THelper; - public: - TLocalHelper& SetShardingMethod(const TString& value) { - TBase::SetShardingMethod(value); - return *this; - } - - void CreateTestOlapTable(TString tableName = "olapTable", TString storeName = "olapStore", - ui32 storeShardsCount = 4, ui32 tableShardsCount = 3) { - CreateOlapTableWithStore(tableName, storeName, storeShardsCount, tableShardsCount); - } - using TBase::TBase; - - TLocalHelper(TKikimrRunner& runner) - : TBase(runner.GetTestServer()) { - - } - }; class TExtLocalHelper: public TLocalHelper { private: @@ -474,86 +83,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } }; - class TClickHelper : public Tests::NCS::TCickBenchHelper { - private: - using TBase = Tests::NCS::TCickBenchHelper; - public: - using TBase::TBase; - - TClickHelper(TKikimrRunner& runner) - : TBase(runner.GetTestServer()) - {} - - void CreateClickBenchTable(TString tableName = "benchTable", ui32 shardsCount = 4) { - TActorId sender = Server.GetRuntime()->AllocateEdgeActor(); - - TBase::CreateTestOlapTable(sender, "", Sprintf(R"( - Name: "%s" - ColumnShardCount: %d - Schema { - %s - } - Sharding { - HashSharding { - Function: HASH_FUNCTION_CONSISTENCY_64 - Columns: "EventTime" - } - })", tableName.c_str(), shardsCount, PROTO_SCHEMA)); - } - }; - - class TTableWithNullsHelper : public Tests::NCS::TTableWithNullsHelper { - private: - using TBase = Tests::NCS::TTableWithNullsHelper; - public: - using TBase::TBase; - - TTableWithNullsHelper(TKikimrRunner& runner) - : TBase(runner.GetTestServer()) - {} - - void CreateTableWithNulls(TString tableName = "tableWithNulls", ui32 shardsCount = 4) { - TActorId sender = Server.GetRuntime()->AllocateEdgeActor(); - - TBase::CreateTestOlapTable(sender, "", Sprintf(R"( - Name: "%s" - ColumnShardCount: %d - Schema { - %s - } - Sharding { - HashSharding { - Function: HASH_FUNCTION_CONSISTENCY_64 - Columns: "id" - } - })", tableName.c_str(), shardsCount, PROTO_SCHEMA)); - } - }; - - void WriteTestData(TKikimrRunner& kikimr, TString testTable, ui64 pathIdBegin, ui64 tsBegin, size_t rowCount, bool withSomeNulls = false) { - UNIT_ASSERT(testTable != "/Root/benchTable"); // TODO: check schema instead - TLocalHelper lHelper(kikimr); - if (withSomeNulls) { - lHelper.WithSomeNulls(); - } - auto batch = lHelper.TestArrowBatch(pathIdBegin, tsBegin, rowCount); - lHelper.SendDataViaActorSystem(testTable, batch); - } - - void WriteTestDataForClickBench(TKikimrRunner& kikimr, TString testTable, ui64 pathIdBegin, ui64 tsBegin, size_t rowCount) { - UNIT_ASSERT(testTable == "/Root/benchTable"); // TODO: check schema instead - TClickHelper lHelper(kikimr.GetTestServer()); - auto batch = lHelper.TestArrowBatch(pathIdBegin, tsBegin, rowCount); - lHelper.SendDataViaActorSystem(testTable, batch); - } - - void WriteTestDataForTableWithNulls(TKikimrRunner& kikimr, TString testTable) { - UNIT_ASSERT(testTable == "/Root/tableWithNulls"); // TODO: check schema instead - TTableWithNullsHelper lHelper(kikimr.GetTestServer()); - auto batch = lHelper.TestArrowBatch(); - lHelper.SendDataViaActorSystem(testTable, batch); - } - void CreateTableOfAllTypes(TKikimrRunner& kikimr) { auto& legacyClient = kikimr.GetTestClient(); @@ -689,59 +218,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { }; } - template - auto StreamExplainQuery(const TString& query, TClient& client) { - if constexpr (std::is_same_v) { - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - return client.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); - } else { - NYdb::NQuery::TExecuteQuerySettings scanSettings; - scanSettings.ExecMode(NYdb::NQuery::EExecMode::Explain); - return client.StreamExecuteQuery(query, NYdb::NQuery::TTxControl::BeginTx().CommitTx(), scanSettings).GetValueSync(); - } - } - - template - void CheckPlanForAggregatePushdown( - const TString& query, - TClient& client, - const std::vector& expectedPlanNodes, - const std::string& readNodeType) - { - auto res = StreamExplainQuery(query, client); - UNIT_ASSERT_C(res.IsSuccess(), res.GetIssues().ToString()); - - auto planRes = CollectStreamResult(res); - auto ast = planRes.QueryStats->Getquery_ast(); - Cerr << "JSON Plan:" << Endl; - Cerr << planRes.PlanJson.GetOrElse("NO_PLAN") << Endl; - Cerr << "AST:" << Endl; - Cerr << ast << Endl; - for (auto planNode : expectedPlanNodes) { - UNIT_ASSERT_C(ast.find(planNode) != std::string::npos, - TStringBuilder() << planNode << " was not found. Query: " << query); - } - UNIT_ASSERT_C(ast.find("SqueezeToDict") == std::string::npos, TStringBuilder() << "SqueezeToDict denied for aggregation requests. Query: " << query); - - if (!readNodeType.empty()) { - NJson::TJsonValue planJson; - NJson::ReadJsonTree(*planRes.PlanJson, &planJson, true); - auto readNode = FindPlanNodeByKv(planJson, "Node Type", readNodeType.c_str()); - UNIT_ASSERT(readNode.IsDefined()); - - auto& operators = readNode.GetMapSafe().at("Operators").GetArraySafe(); - for (auto& op : operators) { - if (op.GetMapSafe().at("Name") == "TableFullScan") { - auto ssaProgram = op.GetMapSafe().at("SsaProgram"); - UNIT_ASSERT(ssaProgram.IsDefined()); - UNIT_ASSERT(FindPlanNodes(ssaProgram, "Projection").size()); - break; - } - } - } - } - Y_UNIT_TEST(SimpleQueryOlap) { auto settings = TKikimrSettings() .SetWithSampleTables(false); @@ -1170,308 +646,48 @@ Y_UNIT_TEST_SUITE(KqpOlap) { CompareYson(StreamResultToYson(it), "[]"); } - Y_UNIT_TEST(Aggregation) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); + Y_UNIT_TEST(PushdownFilter) { + static bool enableLog = false; - auto tableClient = kikimr.GetTableClient(); + auto doTest = [](std::optional viaPragma, bool pushdownPresent) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); - { - auto it = tableClient.StreamExecuteScanQuery(R"( - --!syntax_v1 + if (enableLog) { + Cerr << "Run test:" << Endl; + Cerr << "viaPragma is " << (viaPragma.has_value() ? "" : "not ") << "present."; + if (viaPragma.has_value()) { + Cerr << " Value: " << viaPragma.value(); + } + Cerr << Endl; + Cerr << "Expected result: " << pushdownPresent << Endl; + } - SELECT - COUNT(*) - FROM `/Root/olapStore/olapTable` - )").GetValueSync(); + TKikimrRunner kikimr(settings); + kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[0u;]])"); - } + auto client = kikimr.GetTableClient(); - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } + TLocalHelper(kikimr).CreateTestOlapTable(); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 10); - { - auto it = tableClient.StreamExecuteScanQuery(R"( - --!syntax_v1 + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); - SELECT - COUNT(*), MAX(`resource_id`), MAX(`timestamp`), MIN(LENGTH(`message`)) - FROM `/Root/olapStore/olapTable` - )").GetValueSync(); + { + TString query = TString(R"( + --!syntax_v1 + SELECT * FROM `/Root/olapStore/olapTable` WHERE resource_id = "5"u; + )"); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[23000u;["40999"];[3004999u];[1036u]]])"); - } + if (viaPragma.has_value() && !viaPragma.value()) { + TString pragma = TString(R"( + PRAGMA Kikimr.OptEnableOlapPushdown = "false"; + )"); + query = pragma + query; + } - { - auto it = tableClient.StreamExecuteScanQuery(R"( - --!syntax_v1 - - SELECT - COUNT(*) - FROM `/Root/olapStore/olapTable` - )").GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[23000u;]])"); - } - } - - Y_UNIT_TEST(Indexes) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); - -// Tests::NCommon::TLoggerInit(kikimr).Initialize(); - - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_resource_id, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["resource_id", "level"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - - std::vector uids; - std::vector resourceIds; - std::vector levels; - - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1000000, 300000000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1100000, 300100000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1200000, 300200000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1300000, 300300000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 1400000, 300400000, 10000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 2000000, 200000000, 70000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 3000000, 100000000, 110000); - - const auto filler = [&](const ui32 startRes, const ui32 startUid, const ui32 count) { - for (ui32 i = 0; i < count; ++i) { - uids.emplace_back("uid_" + ::ToString(startUid + i)); - resourceIds.emplace_back(::ToString(startRes + i)); - levels.emplace_back(i % 5); - } - }; - - filler(1000000, 300000000, 10000); - filler(1100000, 300100000, 10000); - filler(1200000, 300200000, 10000); - filler(1300000, 300300000, 10000); - filler(1400000, 300400000, 10000); - filler(2000000, 200000000, 70000); - filler(3000000, 100000000, 110000); - - } - - { - auto it = tableClient.StreamExecuteScanQuery(R"( - --!syntax_v1 - - SELECT - COUNT(*) - FROM `/Root/olapStore/olapTable` - )").GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[230000u;]])"); - } - - AFL_VERIFY(csController->GetIndexesSkippingOnSelect().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() == 0); - TInstant start = Now(); - ui32 compactionsStart = csController->GetCompactions().Val(); - while (Now() - start < TDuration::Seconds(10)) { - if (compactionsStart != csController->GetCompactions().Val()) { - compactionsStart = csController->GetCompactions().Val(); - start = Now(); - } - Cerr << "WAIT_COMPACTION: " << csController->GetCompactions().Val() << Endl; - Sleep(TDuration::Seconds(1)); - } - - { - auto it = tableClient.StreamExecuteScanQuery(R"( - --!syntax_v1 - - SELECT - COUNT(*) - FROM `/Root/olapStore/olapTable` - WHERE ((resource_id = '2' AND level = 222222) OR (resource_id = '1' AND level = 111111) OR (resource_id LIKE '%11dd%')) AND uid = '222' - )").GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << Endl; - CompareYson(result, R"([[0u;]])"); - AFL_VERIFY(csController->GetIndexesSkippedNoData().Val() == 0); - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < csController->GetIndexesSkippingOnSelect().Val() * 0.3); - } - ui32 requestsCount = 100; - for (ui32 i = 0; i < requestsCount; ++i) { - const ui32 idx = RandomNumber(uids.size()); - const auto query = [](const TString& res, const TString& uid, const ui32 level) { - TStringBuilder sb; - sb << "SELECT" << Endl; - sb << "COUNT(*)" << Endl; - sb << "FROM `/Root/olapStore/olapTable`" << Endl; - sb << "WHERE(" << Endl; - sb << "resource_id = '" << res << "' AND" << Endl; - sb << "uid= '" << uid << "' AND" << Endl; - sb << "level= " << level << Endl; - sb << ")"; - return sb; - }; - auto it = tableClient.StreamExecuteScanQuery(query(resourceIds[idx], uids[idx], levels[idx])).GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << csController->GetIndexesSkippingOnSelect().Val() << " / " << csController->GetIndexesApprovedOnSelect().Val() << " / " << csController->GetIndexesSkippedNoData().Val() << Endl; - CompareYson(result, R"([[1u;]])"); - } - - AFL_VERIFY(csController->GetIndexesApprovedOnSelect().Val() < 0.20 * csController->GetIndexesSkippingOnSelect().Val()); - - } - - Y_UNIT_TEST(IndexesModificationError) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); - - // Tests::NCommon::TLoggerInit(kikimr).Initialize(); - - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["uid", "resource_id"], "false_positive_probability" : 0.05}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_UNEQUAL(alterResult.GetStatus(), EStatus::SUCCESS); - } - - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.005}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_UNEQUAL(alterResult.GetStatus(), EStatus::SUCCESS); - } - - { - auto alterQuery = TStringBuilder() << - R"(ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_INDEX, NAME=index_uid, TYPE=BLOOM_FILTER, - FEATURES=`{"column_names" : ["uid"], "false_positive_probability" : 0.01}`); - )"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - - { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_INDEX, NAME=index_uid);"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - - } - - Y_UNIT_TEST(PushdownFilter) { - static bool enableLog = false; - - auto doTest = [](std::optional viaPragma, bool pushdownPresent) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - - if (enableLog) { - Cerr << "Run test:" << Endl; - Cerr << "viaPragma is " << (viaPragma.has_value() ? "" : "not ") << "present."; - if (viaPragma.has_value()) { - Cerr << " Value: " << viaPragma.value(); - } - Cerr << Endl; - Cerr << "Expected result: " << pushdownPresent << Endl; - } - - TKikimrRunner kikimr(settings); - kikimr.GetTestServer().GetRuntime()->SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); - - auto client = kikimr.GetTableClient(); - - TLocalHelper(kikimr).CreateTestOlapTable(); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000, 10); - - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - - { - TString query = TString(R"( - --!syntax_v1 - SELECT * FROM `/Root/olapStore/olapTable` WHERE resource_id = "5"u; - )"); - - if (viaPragma.has_value() && !viaPragma.value()) { - TString pragma = TString(R"( - PRAGMA Kikimr.OptEnableOlapPushdown = "false"; - )"); - query = pragma + query; - } - - auto it = client.StreamExecuteScanQuery(query).GetValueSync(); + auto it = client.StreamExecuteScanQuery(query).GetValueSync(); UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); TString result = StreamResultToYson(it); @@ -1481,7 +697,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { ["some prefix xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"]; ["5"]; 1000005u; - ["uid_1000005"] + "uid_1000005" ]])"); } }; @@ -1586,7 +802,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { ui32 i = 0; const ui32 rowsPack = 20; const TInstant start = Now(); - while (!csController->HasCompactions() && Now() - start < TDuration::Seconds(100)) { + while (!csController->GetCompactionFinishedCounter().Val() && Now() - start < TDuration::Seconds(100)) { WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000 + i * rowsPack, rowsPack); ++i; rowsCount += rowsPack; @@ -2064,1757 +1280,48 @@ Y_UNIT_TEST_SUITE(KqpOlap) { auto tableClient = kikimr.GetTableClient(); auto query = R"( - PRAGMA DisableAnsiLike; - SELECT id, resource_id FROM `/Root/tableWithNulls` WHERE resource_id LIKE "%5%" - )"; - auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - - auto result = CollectStreamResult(it); - auto ast = result.QueryStats->Getquery_ast(); - UNIT_ASSERT_C(ast.find("KqpOlapFilter") == std::string::npos, - TStringBuilder() << "Predicate pushed down. Query: " << query); - } - - Y_UNIT_TEST(PredicatePushdown_MixStrictAndNotStrict) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - - TStreamExecScanQuerySettings scanSettings; - scanSettings.Explain(true); - - TLocalHelper(kikimr).CreateTestOlapTable(); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 5); - Tests::NCommon::TLoggerInit(kikimr).Initialize(); - - auto tableClient = kikimr.GetTableClient(); - auto query = R"( - PRAGMA Kikimr.OptEnablePredicateExtract = "false"; - SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE - `resource_id` = "10001" AND Unwrap(`level`/1) = `level` AND `level` > 1; - )"; - - auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - - auto result = CollectStreamResult(it); - auto ast = result.QueryStats->Getquery_ast(); - UNIT_ASSERT_C(ast.find(R"(('eq '"resource_id")") != std::string::npos, - TStringBuilder() << "Predicate not pushed down. Query: " << query); - UNIT_ASSERT_C(ast.find(R"(('gt '"level")") == std::string::npos, - TStringBuilder() << "Predicate pushed down. Query: " << query); - UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos, - TStringBuilder() << "NarrowMap was removed. Query: " << query); - } - - Y_UNIT_TEST(AggregationCountPushdown) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false) - .SetForceColumnTablesCompositeMarks(true); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); - - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } - - { - TString query = R"( - --!syntax_v1 - SELECT - COUNT(level) - FROM `/Root/olapStore/olapTable` - )"; - auto opStartTime = Now(); - auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cerr << "!!!\nPushdown query execution time: " << (Now() - opStartTime).MilliSeconds() << "\n!!!\n"; - Cout << result << Endl; - CompareYson(result, R"([[23000u;]])"); - - // Check plan -#if SSA_RUNTIME_VERSION >= 2U - CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); -#else - CheckPlanForAggregatePushdown(query, tableClient, { "CombineCore" }, ""); -#endif - } - } - - Y_UNIT_TEST(AggregationCountGroupByPushdown) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false) - .SetForceColumnTablesCompositeMarks(true); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); - - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } - - { - TString query = R"( - --!syntax_v1 - PRAGMA Kikimr.OptUseFinalizeByKey; - SELECT - level, COUNT(level) - FROM `/Root/olapStore/olapTable` - GROUP BY level - ORDER BY level - )"; - auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[[0];4600u];[[1];4600u];[[2];4600u];[[3];4600u];[[4];4600u]])"); - - // Check plan -#if SSA_RUNTIME_VERSION >= 2U - CheckPlanForAggregatePushdown(query, tableClient, { "WideCombiner" }, "Aggregate-TableFullScan"); -// CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); -#else - CheckPlanForAggregatePushdown(query, tableClient, { "CombineCore" }, ""); -#endif - } - } - - Y_UNIT_TEST_TWIN(CountAllPushdown, UseLlvm) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false) - .SetForceColumnTablesCompositeMarks(true); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); - - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } - - { - TString query = fmt::format(R"( - --!syntax_v1 - PRAGMA ydb.UseLlvm = "{}"; - - SELECT - COUNT(*) - FROM `/Root/olapStore/olapTable` - )", UseLlvm ? "true" : "false"); - auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[23000u;]])"); - - // Check plan -#if SSA_RUNTIME_VERSION >= 2U - CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); -#else - CheckPlanForAggregatePushdown(query, tableClient, { "Condense" }, ""); -#endif - } - } - - Y_UNIT_TEST_TWIN(CountAllPushdownBackwardCompatibility, EnableLlvm) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false) - .SetForceColumnTablesCompositeMarks(true); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); - - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } - - { - TString query = fmt::format(R"( - --!syntax_v1 - PRAGMA Kikimr.EnableLlvm = "{}"; - - SELECT - COUNT(*) - FROM `/Root/olapStore/olapTable` - )", EnableLlvm ? "true" : "false"); - auto it = tableClient.StreamExecuteScanQuery(query).GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[23000u;]])"); - - // Check plan -#if SSA_RUNTIME_VERSION >= 2U - CheckPlanForAggregatePushdown(query, tableClient, { "TKqpOlapAgg" }, "TableFullScan"); -#else - CheckPlanForAggregatePushdown(query, tableClient, { "Condense" }, ""); -#endif - } - } - - Y_UNIT_TEST(CountAllNoPushdown) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false) - .SetForceColumnTablesCompositeMarks(true); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); - - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } - - { - auto it = tableClient.StreamExecuteScanQuery(R"( - --!syntax_v1 - SELECT - COUNT(*) - FROM `/Root/olapStore/olapTable` - )").GetValueSync(); - - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - Cout << result << Endl; - CompareYson(result, R"([[23000u;]])"); - } - } - - class TExpectedLimitChecker { - private: - std::optional ExpectedLimit; - std::optional ExpectedResultCount; - ui32 CheckScanData = 0; - ui32 CheckScanTask = 0; - public: - TExpectedLimitChecker& SetExpectedLimit(const ui32 value) { - ExpectedLimit = value; - ExpectedResultCount = value; - return *this; - } - TExpectedLimitChecker& SetExpectedResultCount(const ui32 value) { - ExpectedResultCount = value; - return *this; - } - bool CheckExpectedLimitOnScanData(const ui32 resultCount) { - if (!ExpectedResultCount) { - return true; - } - ++CheckScanData; - UNIT_ASSERT_LE(resultCount, *ExpectedResultCount); - return true; - } - bool CheckExpectedLimitOnScanTask(const ui32 taskLimit) { - if (!ExpectedLimit) { - return true; - } - ++CheckScanTask; - UNIT_ASSERT_EQUAL(taskLimit, *ExpectedLimit); - return true; - } - bool CheckFinish() const { - if (!ExpectedLimit) { - return true; - } - return CheckScanData && CheckScanTask; - } - }; - - class TExpectedRecordChecker { - private: - std::optional ExpectedColumnsCount; - ui32 CheckScanData = 0; - public: - TExpectedRecordChecker& SetExpectedColumnsCount(const ui32 value) { - ExpectedColumnsCount = value; - return *this; - } - bool CheckExpectedOnScanData(const ui32 columnsCount) { - if (!ExpectedColumnsCount) { - return true; - } - ++CheckScanData; - UNIT_ASSERT_EQUAL(columnsCount, *ExpectedColumnsCount); - return true; - } - bool CheckFinish() const { - if (!ExpectedColumnsCount) { - return true; - } - return CheckScanData; - } - }; - - class TAggregationTestCase { - private: - TString Query; - TString ExpectedReply; - std::vector ExpectedPlanOptions; - bool Pushdown = true; - std::string ExpectedReadNodeType; - TExpectedLimitChecker LimitChecker; - TExpectedRecordChecker RecordChecker; - bool UseLlvm = true; - public: - void FillExpectedAggregationGroupByPlanOptions() { -#if SSA_RUNTIME_VERSION >= 2U -// AddExpectedPlanOptions("TKqpOlapAgg"); - AddExpectedPlanOptions("WideCombiner"); -#else - AddExpectedPlanOptions("CombineCore"); -#endif - } - TString GetFixedQuery() const { - TStringBuilder queryFixed; - queryFixed << "--!syntax_v1" << Endl; - if (!Pushdown) { - queryFixed << "PRAGMA Kikimr.OptEnableOlapPushdown = \"false\";" << Endl; - } - if (!UseLlvm) { - queryFixed << "PRAGMA Kikimr.UseLlvm = \"false\";" << Endl; - } - queryFixed << "PRAGMA Kikimr.OptUseFinalizeByKey;" << Endl; - - queryFixed << Query << Endl; - Cerr << "REQUEST:\n" << queryFixed << Endl; - return queryFixed; - } - TAggregationTestCase() = default; - TExpectedLimitChecker& MutableLimitChecker() { - return LimitChecker; - } - TExpectedRecordChecker& MutableRecordChecker() { - return RecordChecker; - } - bool GetPushdown() const { - return Pushdown; - } - TAggregationTestCase& SetPushdown(const bool value = true) { - Pushdown = value; - return *this; - } - bool CheckFinished() const { - return LimitChecker.CheckFinish(); - } - - const TString& GetQuery() const { - return Query; - } - TAggregationTestCase& SetQuery(const TString& value) { - Query = value; - return *this; - } - TAggregationTestCase& SetUseLlvm(const bool value) { - UseLlvm = value; - return *this; - } - const TString& GetExpectedReply() const { - return ExpectedReply; - } - TAggregationTestCase& SetExpectedReply(const TString& value) { - ExpectedReply = value; - return *this; - } - - TAggregationTestCase& AddExpectedPlanOptions(const std::string& value) { - ExpectedPlanOptions.emplace_back(value); - return *this; - } - - const std::vector& GetExpectedPlanOptions() const { - return ExpectedPlanOptions; - } - - TAggregationTestCase& SetExpectedReadNodeType(const std::string& value) { - ExpectedReadNodeType = value; - return *this; - } - - const std::string& GetExpectedReadNodeType() const { - return ExpectedReadNodeType; - } - }; - - void TestAggregationsBase(const std::vector& cases) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false) - .SetForceColumnTablesCompositeMarks(true); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr).CreateTestOlapTable(); - auto tableClient = kikimr.GetTableClient(); - - { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 11000, 3001000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 12000, 3002000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 13000, 3003000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 14000, 3004000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 20000, 2000000, 7000); - WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); - } - - for (auto&& i : cases) { - const TString queryFixed = i.GetFixedQuery(); - { - auto it = tableClient.StreamExecuteScanQuery(queryFixed).GetValueSync(); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - if (!i.GetExpectedReply().empty()) { - CompareYson(result, i.GetExpectedReply()); - } - } - CheckPlanForAggregatePushdown(queryFixed, tableClient, i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); - } - } - - void TestAggregationsInternal(const std::vector& cases) { - TPortManager tp; - ui16 mbusport = tp.GetPort(2134); - auto settings = Tests::TServerSettings(mbusport) - .SetDomainName("Root") - .SetUseRealThreads(false) - .SetNodeCount(2); - - Tests::TServer::TPtr server = new Tests::TServer(settings); - - auto runtime = server->GetRuntime(); - auto sender = runtime->AllocateEdgeActor(); - - InitRoot(server, sender); - Tests::NCommon::TLoggerInit(runtime).Initialize(); - - ui32 numShards = 1; - ui32 numIterations = 10; - TLocalHelper(*server).CreateTestOlapTable("olapTable", "olapStore", numShards, numShards); - const ui32 iterationPackSize = 2000; - for (ui64 i = 0; i < numIterations; ++i) { - TLocalHelper(*server).SendDataViaActorSystem("/Root/olapStore/olapTable", 0, 1000000 + i * 1000000, iterationPackSize); - } - - TAggregationTestCase currentTest; - auto captureEvents = [&](TAutoPtr& ev) -> auto { - switch (ev->GetTypeRewrite()) { - case NKqp::TKqpComputeEvents::EvScanData: - { - auto* msg = ev->Get(); - Y_ABORT_UNLESS(currentTest.MutableLimitChecker().CheckExpectedLimitOnScanData(msg->ArrowBatch ? msg->ArrowBatch->num_rows() : 0)); - Y_ABORT_UNLESS(currentTest.MutableRecordChecker().CheckExpectedOnScanData(msg->ArrowBatch ? msg->ArrowBatch->num_columns() : 0)); - break; - } - case TEvDataShard::EvKqpScan: - { - auto* msg = ev->Get(); - Y_ABORT_UNLESS(currentTest.MutableLimitChecker().CheckExpectedLimitOnScanTask(msg->Record.GetItemsLimit())); - break; - } - } - return TTestActorRuntime::EEventAction::PROCESS; - }; - runtime->SetObserverFunc(captureEvents); - - for (auto&& i : cases) { - const TString queryFixed = i.GetFixedQuery(); - currentTest = i; - auto streamSender = runtime->AllocateEdgeActor(); - SendRequest(*runtime, streamSender, MakeStreamRequest(streamSender, queryFixed, false)); - auto ev = runtime->GrabEdgeEventRethrow(streamSender, TDuration::Seconds(10)); - Y_ABORT_UNLESS(currentTest.CheckFinished()); - } - } - - void TestAggregations(const std::vector& cases) { - TestAggregationsBase(cases); - TestAggregationsInternal(cases); - } - - template - auto StreamExecuteQuery(const TAggregationTestCase& testCase, TClient& client) { - if constexpr (std::is_same_v) { - return client.StreamExecuteScanQuery(testCase.GetFixedQuery()).GetValueSync(); - } else { - return client.StreamExecuteQuery( - testCase.GetFixedQuery(), - NYdb::NQuery::TTxControl::BeginTx().CommitTx()).GetValueSync(); - } - } - - template - void RunTestCaseWithClient(const TAggregationTestCase& testCase, TClient& client) { - auto it = StreamExecuteQuery(testCase, client); - UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - TString result = StreamResultToYson(it); - if (!testCase.GetExpectedReply().empty()) { - CompareYson(result, testCase.GetExpectedReply()); - } - } - - void TestClickBenchBase(const std::vector& cases, const bool genericQuery) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false) - .SetForceColumnTablesCompositeMarks(true); - TKikimrRunner kikimr(settings); - - TClickHelper(kikimr).CreateClickBenchTable(); - auto tableClient = kikimr.GetTableClient(); - - - ui32 numIterations = 10; - const ui32 iterationPackSize = NSan::PlainOrUnderSanitizer(2000, 20); - for (ui64 i = 0; i < numIterations; ++i) { - WriteTestDataForClickBench(kikimr, "/Root/benchTable", 0, 1000000 + i * 1000000, iterationPackSize); - } - - if (!genericQuery) { - auto tableClient = kikimr.GetTableClient(); - for (auto&& i : cases) { - const TString queryFixed = i.GetFixedQuery(); - RunTestCaseWithClient(i, tableClient); - CheckPlanForAggregatePushdown(queryFixed, tableClient, i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); - } - } else { - auto queryClient = kikimr.GetQueryClient(); - for (auto&& i : cases) { - const TString queryFixed = i.GetFixedQuery(); - RunTestCaseWithClient(i, queryClient); - CheckPlanForAggregatePushdown(queryFixed, queryClient, i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); - } - } - } - - void TestClickBenchInternal(const std::vector& cases) { - TPortManager tp; - ui16 mbusport = tp.GetPort(2134); - auto settings = Tests::TServerSettings(mbusport) - .SetDomainName("Root") - .SetUseRealThreads(false) - .SetNodeCount(2); - - Tests::TServer::TPtr server = new Tests::TServer(settings); - - auto runtime = server->GetRuntime(); - auto sender = runtime->AllocateEdgeActor(); - - InitRoot(server, sender); - - TClickHelper(*server).CreateClickBenchTable(); - - // write data - - ui32 numIterations = 10; - const ui32 iterationPackSize = NSan::PlainOrUnderSanitizer(2000, 20); - for (ui64 i = 0; i < numIterations; ++i) { - TClickHelper(*server).SendDataViaActorSystem("/Root/benchTable", 0, 1000000 + i * 1000000, - iterationPackSize); - } - - TAggregationTestCase currentTest; - auto captureEvents = [&](TAutoPtr& ev) -> auto { - switch (ev->GetTypeRewrite()) { - case NKqp::TKqpComputeEvents::EvScanData: - { - auto* msg = ev->Get(); - Y_ABORT_UNLESS(currentTest.MutableLimitChecker().CheckExpectedLimitOnScanData(msg->ArrowBatch ? msg->ArrowBatch->num_rows() : 0)); - Y_ABORT_UNLESS(currentTest.MutableRecordChecker().CheckExpectedOnScanData(msg->ArrowBatch ? msg->ArrowBatch->num_columns() : 0)); - break; - } - case TEvDataShard::EvKqpScan: - { - auto* msg = ev->Get(); - Y_ABORT_UNLESS(currentTest.MutableLimitChecker().CheckExpectedLimitOnScanTask(msg->Record.GetItemsLimit())); - break; - } - } - return TTestActorRuntime::EEventAction::PROCESS; - }; - runtime->SetObserverFunc(captureEvents); - - // selects - - for (auto&& i : cases) { - const TString queryFixed = i.GetFixedQuery(); - currentTest = i; - auto streamSender = runtime->AllocateEdgeActor(); - SendRequest(*runtime, streamSender, MakeStreamRequest(streamSender, queryFixed, false)); - auto ev = runtime->GrabEdgeEventRethrow(streamSender, TDuration::Seconds(10)); - Y_ABORT_UNLESS(currentTest.CheckFinished()); - } - } - - void TestClickBench(const std::vector& cases, const bool genericQuery = false) { - TestClickBenchBase(cases, genericQuery); - if (!genericQuery) { - TestClickBenchInternal(cases); - } - } - - void TestTableWithNulls(const std::vector& cases, const bool genericQuery = false) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false) - .SetForceColumnTablesCompositeMarks(true); - TKikimrRunner kikimr(settings); - - Tests::NCommon::TLoggerInit(kikimr).Initialize(); - TTableWithNullsHelper(kikimr).CreateTableWithNulls(); - auto tableClient = kikimr.GetTableClient(); - - { - WriteTestDataForTableWithNulls(kikimr, "/Root/tableWithNulls"); - } - - if (!genericQuery) { - auto tableClient = kikimr.GetTableClient(); - for (auto&& i : cases) { - RunTestCaseWithClient(i, tableClient); - CheckPlanForAggregatePushdown(i.GetFixedQuery(), tableClient, - i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); - } - } else { - auto queryClient = kikimr.GetQueryClient(); - for (auto&& i : cases) { - RunTestCaseWithClient(i, queryClient); - CheckPlanForAggregatePushdown(i.GetFixedQuery(), queryClient, - i.GetExpectedPlanOptions(), i.GetExpectedReadNodeType()); - } - } - } - - Y_UNIT_TEST(Filter_NotAllUsedFieldsInResultSet) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, resource_id FROM `/Root/tableWithNulls` - WHERE - level = 5; - )") - .SetExpectedReply("[[5;#]]") - .AddExpectedPlanOptions("KqpOlapFilter"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultDistinctCountRI_GroupByL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, COUNT(DISTINCT resource_id) - FROM `/Root/olapStore/olapTable` - GROUP BY level - ORDER BY level - )") - .SetExpectedReply("[[[0];4600u];[[1];4600u];[[2];4600u];[[3];4600u];[[4];4600u]]") - ; - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultCountAll_FilterL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - COUNT(*) - FROM `/Root/olapStore/olapTable` - WHERE level = 2 - )") - .SetExpectedReply("[[4600u;]]") - .AddExpectedPlanOptions("KqpOlapFilter") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg") - .MutableLimitChecker().SetExpectedResultCount(1) -#else - .AddExpectedPlanOptions("Condense") -#endif - ; - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultCountL_FilterL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - COUNT(level) - FROM `/Root/olapStore/olapTable` - WHERE level = 2 - )") - .SetExpectedReply("[[4600u;]]") - .AddExpectedPlanOptions("KqpOlapFilter") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg") - .MutableLimitChecker().SetExpectedResultCount(1) -#else - .AddExpectedPlanOptions("CombineCore") -#endif - ; - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultCountT_FilterL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - COUNT(timestamp) - FROM `/Root/olapStore/olapTable` - WHERE level = 2 - )") - .SetExpectedReply("[[4600u;]]") - .AddExpectedPlanOptions("KqpOlapFilter") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg") - .MutableLimitChecker().SetExpectedResultCount(1) -#else - .AddExpectedPlanOptions("CombineCore") - .AddExpectedPlanOptions("KqpOlapFilter") -#endif - ; - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultTL_FilterL_Limit2) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - timestamp, level - FROM `/Root/olapStore/olapTable` - WHERE level = 2 - LIMIT 2 - )") - .AddExpectedPlanOptions("KqpOlapFilter") - .MutableLimitChecker().SetExpectedLimit(2); - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultTL_FilterL_OrderT_Limit2) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - timestamp, level - FROM `/Root/olapStore/olapTable` - WHERE level = 2 - ORDER BY timestamp - LIMIT 2 - )") - .AddExpectedPlanOptions("KqpOlapFilter") - .MutableLimitChecker().SetExpectedLimit(2); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultT_FilterL_Limit2) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - timestamp - FROM `/Root/olapStore/olapTable` - WHERE level = 2 - LIMIT 2 - )") - .AddExpectedPlanOptions("KqpOlapFilter") - .AddExpectedPlanOptions("KqpOlapExtractMembers") - .MutableLimitChecker().SetExpectedLimit(2); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultT_FilterL_OrderT_Limit2) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - timestamp - FROM `/Root/olapStore/olapTable` - WHERE level = 2 - ORDER BY timestamp - LIMIT 2 - )") - .AddExpectedPlanOptions("KqpOlapFilter") - .AddExpectedPlanOptions("KqpOlapExtractMembers") - .MutableLimitChecker().SetExpectedLimit(2); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultL_FilterL_OrderL_Limit2) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - timestamp, level - FROM `/Root/olapStore/olapTable` - WHERE level > 1 - ORDER BY level - LIMIT 2 - )") - .AddExpectedPlanOptions("KqpOlapFilter"); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ResultCountExpr) { - auto g = NColumnShard::TLimits::MaxBlobSizeGuard(10000); - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - COUNT(level + 2) - FROM `/Root/olapStore/olapTable` - )") - .SetExpectedReply("[[23000u;]]") - .AddExpectedPlanOptions("Condense1"); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Count_Null) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - COUNT(level) - FROM `/Root/tableWithNulls` - WHERE id > 5; - )") - .SetExpectedReply("[[0u]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Count_NullMix) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - COUNT(level) - FROM `/Root/tableWithNulls`; - )") - .SetExpectedReply("[[5u]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Count_GroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, COUNT(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 4 AND 5 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[4;1u];[5;1u]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Count_NullGroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, COUNT(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 6 AND 7 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[6;0u];[7;0u]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Count_NullMixGroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, COUNT(level) - FROM `/Root/tableWithNulls` - WHERE id > 4 AND id < 7 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[5;1u];[6;0u]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Count_GroupByNull) { - // Wait for KIKIMR-16940 fix - return; - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, COUNT(id), COUNT(level), COUNT(*) - FROM `/Root/tableWithNulls` - WHERE id > 5 - GROUP BY level - ORDER BY level; - )") - .SetExpectedReply("[[#;5u;0u;5u]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Count_GroupByNullMix) { - // Wait for KIKIMR-16940 fix - return; - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, COUNT(id), COUNT(level), COUNT(*) - FROM `/Root/tableWithNulls` - WHERE id >= 5 - GROUP BY level - ORDER BY level; - )") - .SetExpectedReply("[[#;5u;0u;5u];[[5];1u;1u;1u]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_NoPushdownOnDisabledEmitAggApply) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - PRAGMA DisableEmitAggApply; - SELECT - COUNT(level) - FROM `/Root/olapStore/olapTable` - )") - .SetExpectedReply("[[23000u;]]") - .AddExpectedPlanOptions("Condense1"); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(AggregationAndFilterPushdownOnDiffCols) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - COUNT(`timestamp`) - FROM `/Root/olapStore/olapTable` - WHERE level = 2 - )") - .SetExpectedReply("[[4600u;]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg") -#else - .AddExpectedPlanOptions("CombineCore") -#endif - .AddExpectedPlanOptions("KqpOlapFilter"); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Avg) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - AVG(level), MIN(level) - FROM `/Root/olapStore/olapTable` - )") - .SetExpectedReply("[[[2.];[0]]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Avg_Null) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - AVG(level) - FROM `/Root/tableWithNulls` - WHERE id > 5; - )") - .SetExpectedReply("[[#]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Avg_NullMix) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - AVG(level) - FROM `/Root/tableWithNulls`; - )") - .SetExpectedReply("[[[3.]]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Avg_GroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, AVG(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 4 AND 5 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[4;[4.]];[5;[5.]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Avg_NullGroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, AVG(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 6 AND 7 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[6;#];[7;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Avg_NullMixGroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, AVG(level) - FROM `/Root/tableWithNulls` - WHERE id > 4 AND id < 7 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[5;[5.]];[6;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Avg_GroupByNull) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, AVG(id), AVG(level) - FROM `/Root/tableWithNulls` - WHERE id > 5 - GROUP BY level - ORDER BY level; - )") - .SetExpectedReply("[[#;8.;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Avg_GroupByNullMix) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, AVG(id), AVG(level) - FROM `/Root/tableWithNulls` - WHERE id >= 5 - GROUP BY level - ORDER BY level; - )") - .SetExpectedReply("[[#;8.;#];[[5];5.;[5.]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Sum) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - SUM(level) - FROM `/Root/olapStore/olapTable` - )") - .SetExpectedReply("[[[46000;]]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Sum_Null) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - SUM(level) - FROM `/Root/tableWithNulls` - WHERE id > 5; - )") - .SetExpectedReply("[[#]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Sum_NullMix) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - SUM(level) - FROM `/Root/tableWithNulls`; - )") - .SetExpectedReply("[[[15]]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Sum_GroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, SUM(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 4 AND 5 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[4;[4]];[5;[5]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Sum_NullGroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, SUM(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 6 AND 7 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[6;#];[7;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Sum_NullMixGroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, SUM(level) - FROM `/Root/tableWithNulls` - WHERE id > 4 AND id < 7 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[5;[5]];[6;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Sum_GroupByNull) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, SUM(id), SUM(level) - FROM `/Root/tableWithNulls` - WHERE id > 5 - GROUP BY level - ORDER BY level; - )") - .SetExpectedReply("[[#;40;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Sum_GroupByNullMix) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, SUM(id), SUM(level) - FROM `/Root/tableWithNulls` - WHERE id >= 5 - GROUP BY level - ORDER BY level; - )") - .SetExpectedReply("[[#;40;#];[[5];5;[5]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_SumL_GroupL_OrderL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, SUM(level) - FROM `/Root/olapStore/olapTable` - GROUP BY level - ORDER BY level - )") - .SetExpectedReply("[[[0];[0]];[[1];[4600]];[[2];[9200]];[[3];[13800]];[[4];[18400]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_MinL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - MIN(level) - FROM `/Root/olapStore/olapTable` - )") - .SetExpectedReply("[[[0]]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_MaxL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - MAX(level) - FROM `/Root/olapStore/olapTable` - )") - .SetExpectedReply("[[[4]]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_MinR_GroupL_OrderL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, MIN(resource_id) - FROM `/Root/olapStore/olapTable` - GROUP BY level - ORDER BY level - )") - .SetExpectedReply("[[[0];[\"10000\"]];[[1];[\"10001\"]];[[2];[\"10002\"]];[[3];[\"10003\"]];[[4];[\"10004\"]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_MaxR_GroupL_OrderL) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, MAX(resource_id) - FROM `/Root/olapStore/olapTable` - GROUP BY level - ORDER BY level - )") - .SetExpectedReply("[[[0];[\"40995\"]];[[1];[\"40996\"]];[[2];[\"40997\"]];[[3];[\"40998\"]];[[4];[\"40999\"]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_ProjectionOrder) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - resource_id, level, count(*) as c - FROM `/Root/olapStore/olapTable` - GROUP BY resource_id, level - ORDER BY c, resource_id DESC LIMIT 3 - )") - .SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - .SetExpectedReadNodeType("Aggregate-TableFullScan"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - TestAggregations({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Some) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT SOME(level) FROM `/Root/tableWithNulls` WHERE id=1 - )") - .SetExpectedReply("[[[1]]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Some_Null) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT SOME(level) FROM `/Root/tableWithNulls` WHERE id > 5 - )") - .SetExpectedReply("[[#]]") -#if SSA_RUNTIME_VERSION >= 2U - .AddExpectedPlanOptions("TKqpOlapAgg"); -#else - .AddExpectedPlanOptions("CombineCore"); -#endif - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Some_GroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, SOME(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 4 AND 5 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[4;[4]];[5;[5]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Some_NullGroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, SOME(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 6 AND 7 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[6;#];[7;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Some_NullMixGroupBy) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, SOME(level) - FROM `/Root/tableWithNulls` - WHERE id > 4 AND id < 7 - GROUP BY id - ORDER BY id; - )") - .SetExpectedReply("[[5;[5]];[6;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Some_GroupByNullMix) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, SOME(id), SOME(level) - FROM `/Root/tableWithNulls` - WHERE id BETWEEN 5 AND 6 - GROUP BY level - ORDER BY level; - )") - .SetExpectedReply("[[#;6;#];[[5];5;[5]]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Aggregation_Some_GroupByNull) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, SOME(id), SOME(level) - FROM `/Root/tableWithNulls` - WHERE id = 6 - GROUP BY level - ORDER BY level; - )") - .SetExpectedReply("[[#;6;#]]"); - testCase.FillExpectedAggregationGroupByPlanOptions(); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(ClickBenchSmoke) { - TAggregationTestCase q7; - q7.SetQuery(R"( - SELECT - AdvEngineID, COUNT(*) as c - FROM `/Root/benchTable` - WHERE AdvEngineID != 0 - GROUP BY AdvEngineID - ORDER BY c DESC - )") - //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 - // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); - q7.FillExpectedAggregationGroupByPlanOptions(); - - TAggregationTestCase q9; - q9.SetQuery(R"( - SELECT - RegionID, SUM(AdvEngineID), COUNT(*) AS c, avg(ResolutionWidth), COUNT(DISTINCT UserID) - FROM `/Root/benchTable` - GROUP BY RegionID - ORDER BY c DESC - LIMIT 10 - )") - //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 - // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); - q9.FillExpectedAggregationGroupByPlanOptions(); - - TAggregationTestCase q12; - q12.SetQuery(R"( - SELECT - SearchPhrase, count(*) AS c - FROM `/Root/benchTable` - WHERE SearchPhrase != '' - GROUP BY SearchPhrase - ORDER BY c DESC - LIMIT 10; - )") - //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 - // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); - q12.FillExpectedAggregationGroupByPlanOptions(); - - TAggregationTestCase q14; - q14.SetQuery(R"( - SELECT - SearchEngineID, SearchPhrase, count(*) AS c - FROM `/Root/benchTable` - WHERE SearchPhrase != '' - GROUP BY SearchEngineID, SearchPhrase - ORDER BY c DESC - LIMIT 10; - )") - //.SetExpectedReply("[[[\"40999\"];[4];1u];[[\"40998\"];[3];1u];[[\"40997\"];[2];1u]]") - // Should be fixed in https://st.yandex-team.ru/KIKIMR-17009 - // .SetExpectedReadNodeType("TableFullScan"); - .SetExpectedReadNodeType("Aggregate-TableFullScan"); - q14.FillExpectedAggregationGroupByPlanOptions(); - - TAggregationTestCase q22; - q22.SetQuery(R"( - SELECT - SearchPhrase, MIN(URL), MIN(Title), COUNT(*) AS c, COUNT(DISTINCT UserID) - FROM `/Root/benchTable` - WHERE Title LIKE '%Google%' AND URL NOT LIKE '%.google.%' AND SearchPhrase <> '' - GROUP BY SearchPhrase - ORDER BY c DESC - LIMIT 10; - )") - .AddExpectedPlanOptions("KqpOlapFilter") - .SetExpectedReadNodeType("Aggregate-TableFullScan"); - q22.FillExpectedAggregationGroupByPlanOptions(); - - TAggregationTestCase q39; - q39.SetQuery(R"( - SELECT TraficSourceID, SearchEngineID, AdvEngineID, Src, Dst, COUNT(*) AS PageViews - FROM `/Root/benchTable` - WHERE CounterID = 62 AND EventDate >= Date('2013-07-01') AND EventDate <= Date('2013-07-31') AND IsRefresh == 0 - GROUP BY - TraficSourceID, SearchEngineID, AdvEngineID, IF (SearchEngineID = 0 AND AdvEngineID = 0, Referer, '') AS Src, - URL AS Dst - ORDER BY PageViews DESC - LIMIT 10; - )") - .AddExpectedPlanOptions("KqpOlapFilter") - .SetExpectedReadNodeType("Aggregate-Filter-TableFullScan"); - q39.FillExpectedAggregationGroupByPlanOptions(); - - std::vector cases = {q7, q9, q12, q14, q22, q39}; - for (auto&& c : cases) { - c.SetUseLlvm(NSan::PlainOrUnderSanitizer(true, false)); - } - - TestClickBench(cases); - } - - Y_UNIT_TEST(StatsSysView) { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - static ui32 numKinds = 2; - - TLocalHelper(kikimr).CreateTestOlapTable(); - for (ui64 i = 0; i < 100; ++i) { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000 + i*10000, 1000); - } - - auto tableClient = kikimr.GetTableClient(); - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, Sum(Rows) as Rows - FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId, Kind, TabletId - ORDER BY TabletId, Kind, PathId - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); + PRAGMA DisableAnsiLike; + SELECT id, resource_id FROM `/Root/tableWithNulls` WHERE resource_id LIKE "%5%" + )"; + auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), numKinds*3); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); - UNIT_ASSERT_GE(GetUint64(rows[0].at("TabletId")), 72075186224037888ull); - UNIT_ASSERT_GE(GetUint64(rows[2].at("TabletId")), 72075186224037889ull); - UNIT_ASSERT_GE(GetUint64(rows[4].at("TabletId")), 72075186224037890ull); - UNIT_ASSERT_GE(GetUint64(rows[1].at("TabletId")), GetUint64(rows[0].at("TabletId"))); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[2].at("Kind")), "INSERTED"); - UNIT_ASSERT_GE(GetUint64(rows[2].at("TabletId")), GetUint64(rows[1].at("TabletId"))); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[3].at("Kind")), "SPLIT_COMPACTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("TabletId")), GetUint64(rows[2].at("TabletId"))); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[4].at("Kind")), "INSERTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[4].at("TabletId")), GetUint64(rows[5].at("TabletId"))); - UNIT_ASSERT_GE( - GetUint64(rows[0].at("Rows")) + GetUint64(rows[1].at("Rows")) + GetUint64(rows[2].at("Rows")) + - GetUint64(rows[3].at("Rows")) + GetUint64(rows[4].at("Rows")) + GetUint64(rows[5].at("Rows")), - 0.3*0.9*100*1000); // >= 90% of 100K inserted rows + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + UNIT_ASSERT_C(ast.find("KqpOlapFilter") == std::string::npos, + TStringBuilder() << "Predicate pushed down. Query: " << query); } - Y_UNIT_TEST(StatsSysViewTable) { + Y_UNIT_TEST(PredicatePushdown_MixStrictAndNotStrict) { auto settings = TKikimrSettings() .SetWithSampleTables(false); TKikimrRunner kikimr(settings); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - static ui32 numKinds = 5; - - TLocalHelper(kikimr).CreateTestOlapTable("olapTable_1"); - TLocalHelper(kikimr).CreateTestOlapTable("olapTable_2"); - for (ui64 i = 0; i < 10; ++i) { - WriteTestData(kikimr, "/Root/olapStore/olapTable_1", 0, 1000000 + i*10000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable_2", 0, 1000000 + i*10000, 2000); - } - auto tableClient = kikimr.GetTableClient(); - { - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId - FROM `/Root/olapStore/olapTable_1/.sys/primary_index_stats` - GROUP BY PathId, TabletId, Kind - ORDER BY PathId, TabletId, Kind - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_GT(rows.size(), 1*numKinds); - UNIT_ASSERT_LE(rows.size(), 3*numKinds); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.front().at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.back().at("PathId")), 3ull); - } - { - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId - FROM `/Root/olapStore/olapTable_2/.sys/primary_index_stats` - GROUP BY PathId, TabletId, Kind - ORDER BY PathId, TabletId, Kind - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_GT(rows.size(), 1*numKinds); - UNIT_ASSERT_LE(rows.size(), 3*numKinds); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.front().at("PathId")), 4ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.back().at("PathId")), 4ull); - } - { - auto selectQuery = TString(R"( - SELECT * - FROM `/Root/olapStore/olapTable_1/.sys/primary_index_stats` - WHERE - PathId > UInt64("3") - ORDER BY PathId, Kind, TabletId - )"); + TStreamExecScanQuerySettings scanSettings; + scanSettings.Explain(true); - auto rows = ExecuteScanQuery(tableClient, selectQuery); + TLocalHelper(kikimr).CreateTestOlapTable(); + WriteTestData(kikimr, "/Root/olapStore/olapTable", 10000, 3000000, 5); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 0); - } - } + auto tableClient = kikimr.GetTableClient(); + auto query = R"( + PRAGMA Kikimr.OptEnablePredicateExtract = "false"; + SELECT `timestamp` FROM `/Root/olapStore/olapTable` WHERE + `resource_id` = "10001" AND Unwrap(`level`/1) = `level` AND `level` > 1; + )"; - Y_UNIT_TEST(StatsSysViewEnumStringBytes) { - ui64 rawBytesPK1; - ui64 bytesPK1; - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - Tests::NCommon::TLoggerInit(kikimr).Initialize(); - TTypedLocalHelper helper("", kikimr, "olapTable", "olapStore12"); - helper.CreateTestOlapTable(); - helper.FillPKOnly(0, 800000); - helper.GetVolumes(rawBytesPK1, bytesPK1, false); - } + auto it = tableClient.StreamExecuteScanQuery(query, scanSettings).GetValueSync(); + UNIT_ASSERT_C(it.IsSuccess(), it.GetIssues().ToString()); - ui64 rawBytesUnpack1PK = 0; - ui64 bytesUnpack1PK = 0; - ui64 rawBytesPackAndUnpack2PK; - ui64 bytesPackAndUnpack2PK; - const ui32 rowsCount = 800000; - const ui32 groupsCount = 512; - { - auto settings = TKikimrSettings() - .SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - Tests::NCommon::TLoggerInit(kikimr).Initialize(); - TTypedLocalHelper helper("Utf8", kikimr); - helper.CreateTestOlapTable(); - NArrow::NConstruction::TStringPoolFiller sPool(groupsCount, 52); - helper.FillTable(sPool, 0, rowsCount); - helper.PrintCount(); - { - auto d = helper.GetDistribution(); - Y_ABORT_UNLESS(d.GetCount() == rowsCount); - Y_ABORT_UNLESS(d.GetGroupsCount() == groupsCount); - Y_ABORT_UNLESS(d.GetMaxCount() - d.GetMinCount() <= 1); - } - helper.GetVolumes(rawBytesUnpack1PK, bytesUnpack1PK, false); - Sleep(TDuration::Seconds(5)); - auto tableClient = kikimr.GetTableClient(); - { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`);"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field1, `ENCODING.DICTIONARY.ENABLED`=`true`);"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SCHEME_ERROR, alterResult.GetIssues().ToString()); - } - { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED1`=`true`);"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::GENERIC_ERROR, alterResult.GetIssues().ToString()); - } - Sleep(TDuration::Seconds(5)); - helper.FillTable(sPool, 1, rowsCount); - Sleep(TDuration::Seconds(5)); - { - helper.GetVolumes(rawBytesPackAndUnpack2PK, bytesPackAndUnpack2PK, false); - helper.PrintCount(); - { - auto d = helper.GetDistribution(); - Cerr << d.DebugString() << Endl; - Y_ABORT_UNLESS(d.GetCount() == 2 * rowsCount); - Y_ABORT_UNLESS(d.GetGroupsCount() == groupsCount); - Y_ABORT_UNLESS(d.GetMaxCount() - d.GetMinCount() <= 2); - } - } - { - auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `SERIALIZER.CLASS_NAME`=`ARROW_SERIALIZER`, `COMPRESSION.TYPE`=`zstd`);"; - auto session = tableClient.CreateSession().GetValueSync().GetSession(); - auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } - } - const ui64 rawBytesUnpack = rawBytesUnpack1PK - rawBytesPK1; - const ui64 bytesUnpack = bytesUnpack1PK - bytesPK1; - const ui64 rawBytesPack = rawBytesPackAndUnpack2PK - rawBytesUnpack1PK - rawBytesPK1; - const ui64 bytesPack = bytesPackAndUnpack2PK - bytesUnpack1PK - bytesPK1; - TStringBuilder result; - result << "unpacked data: " << rawBytesUnpack << " / " << bytesUnpack << Endl; - result << "packed data: " << rawBytesPack << " / " << bytesPack << Endl; - result << "frq_diff: " << 1.0 * bytesPack / bytesUnpack << Endl; - result << "frq_compression: " << 1.0 * bytesPack / rawBytesPack << Endl; - result << "pk_size : " << rawBytesPK1 << " / " << bytesPK1 << Endl; - Cerr << result << Endl; - Y_ABORT_UNLESS(bytesPack / bytesUnpack < 0.1); + auto result = CollectStreamResult(it); + auto ast = result.QueryStats->Getquery_ast(); + UNIT_ASSERT_C(ast.find(R"(('eq '"resource_id")") != std::string::npos, + TStringBuilder() << "Predicate not pushed down. Query: " << query); + UNIT_ASSERT_C(ast.find(R"(('gt '"level")") == std::string::npos, + TStringBuilder() << "Predicate pushed down. Query: " << query); + UNIT_ASSERT_C(ast.find("NarrowMap") != std::string::npos, + TStringBuilder() << "NarrowMap was removed. Query: " << query); } Y_UNIT_TEST(SelectLimit1ManyShards) { @@ -3837,7 +1344,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { const ui32 numIterations = 10; TLocalHelper(*server).CreateTestOlapTable("selectTable", "selectStore", numShards, numShards); for(ui64 i = 0; i < numIterations; ++i) { - TLocalHelper(*server).SendDataViaActorSystem("/Root/selectStore/selectTable", 0, 1000000 + i*1000000, 2000); + TLocalHelper(*server).SendDataViaActorSystem("/Root/selectStore/selectTable", 0, 1000000 + i * 1000000, 2000); } ui64 result = 0; @@ -3899,7 +1406,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { runtime->SetObserverFunc(captureEvents); auto streamSender = runtime->AllocateEdgeActor(); - SendRequest(*runtime, streamSender, MakeStreamRequest(streamSender, "SELECT * FROM `/Root/selectStore/selectTable` LIMIT 1;", false)); + NDataShard::NKqpHelpers::SendRequest(*runtime, streamSender, NDataShard::NKqpHelpers::MakeStreamRequest(streamSender, "SELECT * FROM `/Root/selectStore/selectTable` LIMIT 1;", false)); auto ev = runtime->GrabEdgeEventRethrow(streamSender); UNIT_ASSERT_VALUES_EQUAL(result, 1); } @@ -3965,7 +1472,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { runtime->SetObserverFunc(captureEvents); auto streamSender = runtime->AllocateEdgeActor(); - SendRequest(*runtime, streamSender, MakeStreamRequest(streamSender, "SELECT COUNT(*) FROM `/Root/largeOlapStore/largeOlapTable`;", false)); + NDataShard::NKqpHelpers::SendRequest(*runtime, streamSender, NDataShard::NKqpHelpers::MakeStreamRequest(streamSender, "SELECT COUNT(*) FROM `/Root/largeOlapStore/largeOlapTable`;", false)); runtime->GrabEdgeEventRethrow(streamSender); UNIT_ASSERT_VALUES_EQUAL(result, insertRows); } @@ -4028,7 +1535,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { runtime->SetObserverFunc(captureEvents); auto streamSender = runtime->AllocateEdgeActor(); - SendRequest(*runtime, streamSender, MakeStreamRequest(streamSender, "SELECT * FROM `/Root/largeOlapStore/largeOlapTable` where resource_id = Utf8(\"notfound\");", false)); + NDataShard::NKqpHelpers::SendRequest(*runtime, streamSender, NDataShard::NKqpHelpers::MakeStreamRequest(streamSender, "SELECT * FROM `/Root/largeOlapStore/largeOlapTable` where resource_id = Utf8(\"notfound\");", false)); auto ev = runtime->GrabEdgeEventRethrow(streamSender); UNIT_ASSERT(hasResult); } @@ -4103,10 +1610,12 @@ Y_UNIT_TEST_SUITE(KqpOlap) { return TTestActorRuntime::EEventAction::PROCESS; } else { if (prevIsFinished) { - Cerr << (TStringBuilder() << "-- EvScanData from " << ev->Sender << ": hijack event"); + Cerr << (TStringBuilder() << "-- EvScanData from " << ev->Sender << ": hijack event" << Endl); Cerr.Flush(); - auto resp = std::make_unique(msg->Generation, 0); - runtime->Send(new IEventHandle(ev->Recipient, ev->Sender, resp.release())); + for (auto&& i : csController->GetShardActualIds()) { + runtime->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new TEvents::TEvPoisonPill(), i, false)); + } } else { prevIsFinished = msg->Finished; } @@ -4123,384 +1632,11 @@ Y_UNIT_TEST_SUITE(KqpOlap) { runtime->SetObserverFunc(captureEvents); auto streamSender = runtime->AllocateEdgeActor(); - SendRequest(*runtime, streamSender, MakeStreamRequest(streamSender, "SELECT COUNT(*) FROM `/Root/largeOlapStore/largeOlapTable`;", false)); + NDataShard::NKqpHelpers::SendRequest(*runtime, streamSender, NDataShard::NKqpHelpers::MakeStreamRequest(streamSender, "SELECT COUNT(*) FROM `/Root/largeOlapStore/largeOlapTable`;", false)); auto ev = runtime->GrabEdgeEventRethrow(streamSender); UNIT_ASSERT_VALUES_EQUAL(result, insertRows); } - Y_UNIT_TEST(StatsSysViewColumns) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - TKikimrRunner kikimr(settings); - - TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable(); - for (ui64 i = 0; i < 10; ++i) { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000 + i*10000, 2000); - } - - auto tableClient = kikimr.GetTableClient(); - - { - auto selectQuery = TString(R"( - SELECT TabletId, PathId, Kind - FROM `/Root/olapStore/.sys/store_primary_index_stats` - ORDER BY PathId, Kind, TabletId - LIMIT 4; - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 4); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[3].at("Kind")), "INSERTED"); - } - { - auto selectQuery = TString(R"( - SELECT SUM(BlobRangeSize) as Bytes, SUM(Rows) as Rows, PathId, TabletId - FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId, TabletId - ORDER BY Bytes - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3); - UNIT_ASSERT_LE(GetUint64(rows[0].at("Bytes")), GetUint64(rows[1].at("Bytes"))); - } - { - auto selectQuery = TString(R"( - SELECT Sum(Rows) as Rows, Kind, Sum(RawBytes) as RawBytes, Sum(Rows) as Rows2, Sum(Rows) as Rows3, PathId - FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY Kind, PathId - ORDER BY PathId, Kind, Rows3 - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 2); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("Rows2")), GetUint64(rows[0].at("Rows3"))); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("Rows")), GetUint64(rows[1].at("Rows3"))); - } - } - - Y_UNIT_TEST(StatsSysViewRanges) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - csController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); - Tests::NCommon::TLoggerInit(kikimr).Initialize(); - - TLocalHelper(kikimr).CreateTestOlapTable("olapTable_1"); - TLocalHelper(kikimr).CreateTestOlapTable("olapTable_2"); - TLocalHelper(kikimr).CreateTestOlapTable("olapTable_3"); - - for (ui64 i = 0; i < 10; ++i) { - WriteTestData(kikimr, "/Root/olapStore/olapTable_1", 0, 1000000 + i*10000, 2000); - WriteTestData(kikimr, "/Root/olapStore/olapTable_2", 0, 1000000 + i*10000, 3000); - WriteTestData(kikimr, "/Root/olapStore/olapTable_3", 0, 1000000 + i*10000, 5000); - } - - auto tableClient = kikimr.GetTableClient(); - - { - auto selectQuery = TString(R"( - SELECT * - FROM `/Root/olapStore/.sys/store_primary_index_stats` - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - } - - { - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE - PathId == UInt64("3") AND Activity = true - GROUP BY TabletId, PathId, Kind - ORDER BY TabletId, Kind - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[2].at("Kind")), "INSERTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[1].at("Kind")), "INSERTED"); - } - - { - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId - FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId, Kind, TabletId - ORDER BY PathId DESC, Kind DESC, TabletId DESC - ; - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - ui32 numExpected = 3*3; - UNIT_ASSERT_VALUES_EQUAL(rows.size(), numExpected); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[numExpected-1].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[numExpected-1].at("Kind")), "INSERTED"); - } - - { - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE - PathId > UInt64("0") AND PathId < UInt32("4") - OR PathId > UInt64("4") AND PathId <= UInt64("5") - GROUP BY PathId, Kind, TabletId - ORDER BY - PathId DESC, Kind DESC, TabletId DESC - ; - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - ui32 numExpected = 2*3; - UNIT_ASSERT_VALUES_EQUAL(rows.size(), numExpected); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[numExpected-1].at("PathId")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[numExpected-1].at("Kind")), "INSERTED"); - } - } - - Y_UNIT_TEST(StatsSysViewFilter) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - - TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable(); - for (ui64 i = 0; i < 10; ++i) { - WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000 + i*10000, 2000); - } - - auto tableClient = kikimr.GetTableClient(); - - { - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, Sum(BlobRangeSize) as Bytes - FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId, Kind, TabletId - ORDER BY PathId, Kind, TabletId; - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_GE(rows.size(), 3); - } - - { - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId, Sum(BlobRangeSize) as Bytes - FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId, Kind, TabletId - ORDER BY PathId, Kind, TabletId; - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_GE(rows.size(), 3); - } - - { - auto selectQuery = TString(R"( - SELECT * - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Kind == 'EVICTED' - ORDER BY PathId, Kind, TabletId; - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_GE(rows.size(), 0); - } - - { - auto selectQuery = TString(R"( - SELECT PathId, Kind, TabletId - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE Kind IN ('SPLIT_COMPACTED', 'INACTIVE', 'EVICTED') - GROUP BY PathId, Kind, TabletId - ORDER BY PathId, Kind, TabletId; - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - - UNIT_ASSERT_GE(rows.size(), 3); - } - } - - Y_UNIT_TEST(StatsSysViewAggregation) { - auto settings = TKikimrSettings().SetWithSampleTables(false); - TKikimrRunner kikimr(settings); - auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); - - TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable("olapTable_1"); - TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable("olapTable_2"); - TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable("olapTable_3"); - - for (ui64 i = 0; i < 100; ++i) { - WriteTestData(kikimr, "/Root/olapStore/olapTable_1", 0, 1000000 + i*10000, 1000); - WriteTestData(kikimr, "/Root/olapStore/olapTable_2", 0, 1000000 + i*10000, 2000); - WriteTestData(kikimr, "/Root/olapStore/olapTable_3", 0, 1000000 + i*10000, 3000); - } - - Tests::NCommon::TLoggerInit(kikimr).Initialize(); - - auto tableClient = kikimr.GetTableClient(); - - { - auto selectQuery = TString(R"( - SELECT - SUM(Rows) as rows, - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE - Kind != 'INACTIVE' - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1ull); - } - - { - auto selectQuery = TString(R"( - SELECT - PathId, - SUM(Rows) as rows, - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE - Kind != 'INACTIVE' - GROUP BY - PathId - ORDER BY - PathId - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 4); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("PathId")), 5); - } - - { - auto selectQuery = TString(R"( - SELECT - PathId, - SUM(Rows) as rows, - SUM(BlobRangeSize) as bytes, - SUM(RawBytes) as bytes_raw - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE - Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') - GROUP BY PathId - ORDER BY rows DESC - LIMIT 10 - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 4); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("PathId")), 3); - } - - { - auto selectQuery = TString(R"( - SELECT - PathId, - SUM(Rows) as rows, - SUM(BlobRangeSize) as bytes, - SUM(RawBytes) as bytes_raw - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE - PathId == UInt64("3") AND Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') - GROUP BY PathId - ORDER BY rows DESC - LIMIT 10 - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3); - } - - { - auto selectQuery = TString(R"( - SELECT - PathId, - SUM(Rows) as rows, - SUM(BlobRangeSize) as bytes, - SUM(RawBytes) as bytes_raw - FROM `/Root/olapStore/.sys/store_primary_index_stats` - WHERE - PathId >= UInt64("4") AND Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') - GROUP BY PathId - ORDER BY rows DESC - LIMIT 10 - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 2ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 4); - } - - { - auto selectQuery = TString(R"( - SELECT PathId, TabletId, Kind - FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId, TabletId, Kind - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - // 3 Tables with 3 Shards each and 2 KindId-s of stats - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3 * 3 * 2); - } - - { - auto selectQuery = TString(R"( - SELECT - count(distinct(PathId)), - count(distinct(Kind)), - count(distinct(TabletId)) - FROM `/Root/olapStore/.sys/store_primary_index_stats` - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column0")), 3ull); - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column1")), 2); - UNIT_ASSERT_GE(GetUint64(rows[0].at("column2")), 3ull); - } - - { - auto selectQuery = TString(R"( - SELECT PathId, count(*), sum(Rows), sum(BlobRangeSize), sum(RawBytes) - FROM `/Root/olapStore/.sys/store_primary_index_stats` - GROUP BY PathId - ORDER BY PathId - )"); - - auto rows = ExecuteScanQuery(tableClient, selectQuery); - UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3ull); - for (ui64 pathId = 3, row = 0; pathId <= 5; ++pathId, ++row) { - UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[row].at("PathId")), pathId); - } - } - } - Y_UNIT_TEST(PredicatePushdownWithParameters) { constexpr bool logQueries = true; auto settings = TKikimrSettings() @@ -5083,195 +2219,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_C(falsePositive.empty() && falseNegative.empty(), b); } - Y_UNIT_TEST(NoErrorOnLegacyPragma) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - PRAGMA Kikimr.KqpPushOlapProcess = "false"; - SELECT id, resource_id FROM `/Root/tableWithNulls` - WHERE - level = 5; - )") - .SetExpectedReply("[[5;#]]") - .AddExpectedPlanOptions("KqpOlapFilter"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(BlocksRead) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - PRAGMA UseBlocks; - PRAGMA Kikimr.OptEnableOlapPushdown = "false"; - - SELECT - id, resource_id - FROM `/Root/tableWithNulls` - WHERE - level = 5; - )") - .SetExpectedReply("[[5;#]]"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Blocks_NoAggPushdown) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - PRAGMA UseBlocks; - SELECT - COUNT(DISTINCT id) - FROM `/Root/tableWithNulls`; - )") - .SetExpectedReply("[[10u]]"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Json_GetValue) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` - WHERE JSON_VALUE(jsonval, "$.col1") = "val1" AND id = 1; - )") -#if SSA_RUNTIME_VERSION >= 3U - .AddExpectedPlanOptions("KqpOlapJsonValue") -#else - .AddExpectedPlanOptions("Udf") -#endif - .SetExpectedReply(R"([[1;["val1"];#]])"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Json_GetValue_ToString) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_VALUE(jsonval, "$.col1" RETURNING String), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` - WHERE JSON_VALUE(jsonval, "$.col1" RETURNING String) = "val1" AND id = 1; - )") -#if SSA_RUNTIME_VERSION >= 3U - .AddExpectedPlanOptions("KqpOlapJsonValue") -#else - .AddExpectedPlanOptions("Udf") -#endif - .SetExpectedReply(R"([[1;["val1"];#]])"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Json_GetValue_ToInt) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls` - WHERE JSON_VALUE(jsonval, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 1; - )") -#if SSA_RUNTIME_VERSION >= 3U - .AddExpectedPlanOptions("KqpOlapJsonValue") -#else - .AddExpectedPlanOptions("Udf") -#endif - .SetExpectedReply(R"([[1;[16];#]])"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(JsonDoc_GetValue) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1") FROM `/Root/tableWithNulls` - WHERE JSON_VALUE(jsondoc, "$.col1") = "val1" AND id = 6; - )") -#if SSA_RUNTIME_VERSION >= 3U - .AddExpectedPlanOptions("KqpOlapJsonValue") -#else - .AddExpectedPlanOptions("Udf") -#endif - .SetExpectedReply(R"([[6;#;["val1"]]])"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(JsonDoc_GetValue_ToString) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_VALUE(jsonval, "$.col1"), JSON_VALUE(jsondoc, "$.col1" RETURNING String) FROM `/Root/tableWithNulls` - WHERE JSON_VALUE(jsondoc, "$.col1" RETURNING String) = "val1" AND id = 6; - )") -#if SSA_RUNTIME_VERSION >= 3U - .AddExpectedPlanOptions("KqpOlapJsonValue") -#else - .AddExpectedPlanOptions("Udf") -#endif - .SetExpectedReply(R"([[6;#;["val1"]]])"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(JsonDoc_GetValue_ToInt) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_VALUE(jsonval, "$.obj.obj_col2_int"), JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) FROM `/Root/tableWithNulls` - WHERE JSON_VALUE(jsondoc, "$.obj.obj_col2_int" RETURNING Int) = 16 AND id = 6; - )") -#if SSA_RUNTIME_VERSION >= 3U - .AddExpectedPlanOptions("KqpOlapJsonValue") -#else - .AddExpectedPlanOptions("Udf") -#endif - .SetExpectedReply(R"([[6;#;[16]]])"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Json_Exists) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_EXISTS(jsonval, "$.col1"), JSON_EXISTS(jsondoc, "$.col1") FROM `/Root/tableWithNulls` - WHERE - JSON_EXISTS(jsonval, "$.col1") AND level = 1; - )") -#if SSA_RUNTIME_VERSION >= 3U - .AddExpectedPlanOptions("KqpOlapJsonExists") -#else - .AddExpectedPlanOptions("Udf") -#endif - .SetExpectedReply(R"([[1;[%true];#]])"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(JsonDoc_Exists) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_EXISTS(jsonval, "$.col1"), JSON_EXISTS(jsondoc, "$.col1") FROM `/Root/tableWithNulls` - WHERE - JSON_EXISTS(jsondoc, "$.col1") AND id = 6; - )") -#if SSA_RUNTIME_VERSION >= 3U - .AddExpectedPlanOptions("KqpOlapJsonExists") -#else - .AddExpectedPlanOptions("Udf") -#endif - .SetExpectedReply(R"([[6;#;[%true]]])"); - - TestTableWithNulls({ testCase }); - } - - Y_UNIT_TEST(Json_Query) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT id, JSON_QUERY(jsonval, "$.col1" WITH UNCONDITIONAL WRAPPER), - JSON_QUERY(jsondoc, "$.col1" WITH UNCONDITIONAL WRAPPER) - FROM `/Root/tableWithNulls` - WHERE - level = 1; - )") - .AddExpectedPlanOptions("Udf") - .SetExpectedReply(R"([[1;["[\"val1\"]"];#]])"); - - TestTableWithNulls({ testCase }); - } - Y_UNIT_TEST(Olap_InsertFailsOnDataQuery) { auto settings = TKikimrSettings() .SetWithSampleTables(false) @@ -5409,7 +2356,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { UNIT_ASSERT_C(result.IsSuccess(), result.GetIssues().ToString()); TString output = FormatResultSetYson(result.GetResultSet(0)); Cout << output << Endl; - CompareYson(output, R"([[1000001u;["1"];["uid_1000001"];[1]]])"); + CompareYson(output, R"([[1000001u;["1"];"uid_1000001";[1]]])"); } Y_UNIT_TEST(OlapRead_GenericQuery) { @@ -5514,7 +2461,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { tableInserter.AddRow().Add(2).Add("test_res_2").Add("val2").AddNull(); testHelper.BulkUpsert(testTable, tableInserter); } - while (csController->GetIndexations().Val() == 0) { + while (csController->GetInsertFinishedCounter().Val() == 0) { Cout << "Wait indexation..." << Endl; Sleep(TDuration::Seconds(2)); } @@ -5540,7 +2487,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { tableInserter.AddRow().Add(1).Add(10); testHelper.BulkUpsert(testTable, tableInserter); } - while (csController->GetIndexations().Val() < 1) { + while (csController->GetInsertFinishedCounter().Val() < 1) { Cout << "Wait indexation..." << Endl; Sleep(TDuration::Seconds(2)); } @@ -5551,7 +2498,7 @@ Y_UNIT_TEST_SUITE(KqpOlap) { testHelper.BulkUpsert(testTable, tableInserter); } testHelper.ReadData("SELECT value FROM `/Root/ColumnTableTest` WHERE id = 1", "[[110]]"); - while (csController->GetIndexations().Val() < 2) { + while (csController->GetInsertFinishedCounter().Val() < 2) { Cout << "Wait indexation..." << Endl; Sleep(TDuration::Seconds(2)); } @@ -6071,53 +3018,6 @@ Y_UNIT_TEST_SUITE(KqpOlap) { } } - Y_UNIT_TEST(BlockGenericWithDistinct) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - COUNT(DISTINCT id) - FROM `/Root/tableWithNulls` - WHERE level = 5 AND Cast(id AS String) = "5"; - )") - .AddExpectedPlanOptions("KqpBlockReadOlapTableRanges") - .AddExpectedPlanOptions("WideFromBlocks") - .SetExpectedReply("[[1u]]"); - TestTableWithNulls({ testCase }, /* generic */ true); - } - - Y_UNIT_TEST(BlockGenericSimpleAggregation) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - level, COUNT(*), SUM(id) - FROM `/Root/tableWithNulls` - WHERE level = 5 - GROUP BY level - ORDER BY level; - )") - .AddExpectedPlanOptions("KqpBlockReadOlapTableRanges") - .AddExpectedPlanOptions("WideFromBlocks") - .SetExpectedReply(R"([[[5];1u;5]])"); - - TestTableWithNulls({ testCase }, /* generic */ true); - } - - Y_UNIT_TEST(BlockGenericSelectAll) { - TAggregationTestCase testCase; - testCase.SetQuery(R"( - SELECT - id, resource_id, level - FROM `/Root/tableWithNulls` - WHERE level != 5 OR level IS NULL - ORDER BY id, resource_id, level; - )") - .AddExpectedPlanOptions("KqpBlockReadOlapTableRanges") - .AddExpectedPlanOptions("WideFromBlocks") - .SetExpectedReply(R"([[1;#;[1]];[2;#;[2]];[3;#;[3]];[4;#;[4]];[6;["6"];#];[7;["7"];#];[8;["8"];#];[9;["9"];#];[10;["10"];#]])"); - - TestTableWithNulls({ testCase }, /* generic */ true); - } } -} // namespace NKqp -} // namespace NKikimr +} diff --git a/ydb/core/kqp/ut/olap/statistics_ut.cpp b/ydb/core/kqp/ut/olap/statistics_ut.cpp new file mode 100644 index 000000000000..d79a07f9bc3b --- /dev/null +++ b/ydb/core/kqp/ut/olap/statistics_ut.cpp @@ -0,0 +1,74 @@ +#include "helpers/typed_local.h" +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapStatistics) { + Y_UNIT_TEST(StatsUsage) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", kikimr); + helper.CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + { + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_pk_int, FEATURES=`{\"column_name\": \"pk_int\"}`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_field, FEATURES=`{\"column_name\": \"field\"}`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_pk_int, FEATURES=`{\"column_name\": \"pk_int\"}`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=max_pk_int);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + } + } + + Y_UNIT_TEST(StatsUsageWithTTL) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", kikimr); + helper.CreateTestOlapTable(); + auto tableClient = kikimr.GetTableClient(); + { + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, TYPE=max, NAME=max_ts, FEATURES=`{\"column_name\": \"ts\"}`);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << "ALTER TABLE `/Root/olapStore/olapTable` SET (TTL = Interval(\"P1D\") ON ts);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + { + auto alterQuery = TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=max_ts);"; + auto session = tableClient.CreateSession().GetValueSync().GetSession(); + auto alterResult = session.ExecuteSchemeQuery(alterQuery).GetValueSync(); + UNIT_ASSERT_VALUES_UNEQUAL_C(alterResult.GetStatus(), NYdb::EStatus::SUCCESS, alterResult.GetIssues().ToString()); + } + } + } +} + +} diff --git a/ydb/core/kqp/ut/olap/sys_view_ut.cpp b/ydb/core/kqp/ut/olap/sys_view_ut.cpp new file mode 100644 index 000000000000..866563f088f6 --- /dev/null +++ b/ydb/core/kqp/ut/olap/sys_view_ut.cpp @@ -0,0 +1,722 @@ +#include "helpers/local.h" +#include "helpers/query_executor.h" +#include "helpers/typed_local.h" +#include "helpers/writer.h" +#include "helpers/get_value.h" + +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapSysView) { + Y_UNIT_TEST(StatsSysView) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + static ui32 numKinds = 2; + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + TLocalHelper(kikimr).CreateTestOlapTable(); + for (ui64 i = 0; i < 100; ++i) { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000 + i * 10000, 1000); + } + + auto tableClient = kikimr.GetTableClient(); + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId, Sum(Rows) as Rows + FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, Kind, TabletId + ORDER BY TabletId, Kind, PathId + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_VALUES_EQUAL(rows.size(), numKinds*3); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); + UNIT_ASSERT_GE(GetUint64(rows[0].at("TabletId")), 72075186224037888ull); + UNIT_ASSERT_GE(GetUint64(rows[2].at("TabletId")), 72075186224037889ull); + UNIT_ASSERT_GE(GetUint64(rows[4].at("TabletId")), 72075186224037890ull); + UNIT_ASSERT_GE(GetUint64(rows[1].at("TabletId")), GetUint64(rows[0].at("TabletId"))); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[2].at("Kind")), "INSERTED"); + UNIT_ASSERT_GE(GetUint64(rows[2].at("TabletId")), GetUint64(rows[1].at("TabletId"))); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[3].at("Kind")), "SPLIT_COMPACTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("TabletId")), GetUint64(rows[2].at("TabletId"))); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[4].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[4].at("TabletId")), GetUint64(rows[5].at("TabletId"))); + UNIT_ASSERT_GE( + GetUint64(rows[0].at("Rows")) + GetUint64(rows[1].at("Rows")) + GetUint64(rows[2].at("Rows")) + + GetUint64(rows[3].at("Rows")) + GetUint64(rows[4].at("Rows")) + GetUint64(rows[5].at("Rows")), + 0.3*0.9*100*1000); // >= 90% of 100K inserted rows + } + + Y_UNIT_TEST(StatsSysViewTable) { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + static ui32 numKinds = 5; + + TLocalHelper(kikimr).CreateTestOlapTable("olapTable_1"); + TLocalHelper(kikimr).CreateTestOlapTable("olapTable_2"); + for (ui64 i = 0; i < 10; ++i) { + WriteTestData(kikimr, "/Root/olapStore/olapTable_1", 0, 1000000 + i*10000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable_2", 0, 1000000 + i*10000, 2000); + } + + auto tableClient = kikimr.GetTableClient(); + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId + FROM `/Root/olapStore/olapTable_1/.sys/primary_index_stats` + GROUP BY PathId, TabletId, Kind + ORDER BY PathId, TabletId, Kind + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_GT(rows.size(), 1*numKinds); + UNIT_ASSERT_LE(rows.size(), 3*numKinds); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.front().at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.back().at("PathId")), 3ull); + } + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId + FROM `/Root/olapStore/olapTable_2/.sys/primary_index_stats` + GROUP BY PathId, TabletId, Kind + ORDER BY PathId, TabletId, Kind + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_GT(rows.size(), 1*numKinds); + UNIT_ASSERT_LE(rows.size(), 3*numKinds); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.front().at("PathId")), 4ull); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows.back().at("PathId")), 4ull); + } + { + auto selectQuery = TString(R"( + SELECT * + FROM `/Root/olapStore/olapTable_1/.sys/primary_index_stats` + WHERE + PathId > UInt64("3") + ORDER BY PathId, Kind, TabletId + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 0); + } + } + + Y_UNIT_TEST(StatsSysViewEnumStringBytes) { + ui64 rawBytesPK1; + ui64 bytesPK1; + { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTypedLocalHelper helper("", kikimr, "olapTable", "olapStore12"); + helper.CreateTestOlapTable(); + helper.FillPKOnly(0, 800000); + helper.GetVolumes(rawBytesPK1, bytesPK1, false); + } + + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + ui64 rawBytesUnpack1PK = 0; + ui64 bytesUnpack1PK = 0; + ui64 rawBytesPackAndUnpack2PK; + ui64 bytesPackAndUnpack2PK; + const ui32 rowsCount = 800000; + const ui32 groupsCount = 512; + { + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", kikimr); + helper.CreateTestOlapTable(); + NArrow::NConstruction::TStringPoolFiller sPool(groupsCount, 52); + helper.FillTable(sPool, 0, rowsCount); + helper.PrintCount(); + { + auto d = helper.GetDistribution(); + Y_ABORT_UNLESS(d.GetCount() == rowsCount); + Y_ABORT_UNLESS(d.GetGroupsCount() == groupsCount); + Y_ABORT_UNLESS(d.GetMaxCount() - d.GetMinCount() <= 1); + } + helper.GetVolumes(rawBytesUnpack1PK, bytesUnpack1PK, false); + Sleep(TDuration::Seconds(5)); + auto tableClient = kikimr.GetTableClient(); + helper.ExecuteSchemeQuery(TStringBuilder() << "ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field1, `ENCODING.DICTIONARY.ENABLED`=`true`);", NYdb::EStatus::SCHEME_ERROR); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED1`=`true`);", NYdb::EStatus::GENERIC_ERROR); + Sleep(TDuration::Seconds(5)); + helper.FillTable(sPool, 1, rowsCount); + Sleep(TDuration::Seconds(5)); + { + helper.GetVolumes(rawBytesPackAndUnpack2PK, bytesPackAndUnpack2PK, false); + helper.PrintCount(); + { + auto d = helper.GetDistribution(); + Cerr << d.DebugString() << Endl; + Y_ABORT_UNLESS(d.GetCount() == 2 * rowsCount); + Y_ABORT_UNLESS(d.GetGroupsCount() == groupsCount); + Y_ABORT_UNLESS(d.GetMaxCount() - d.GetMinCount() <= 2); + } + } + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `SERIALIZER.CLASS_NAME`=`ARROW_SERIALIZER`, `COMPRESSION.TYPE`=`zstd`);"); + } + const ui64 rawBytesUnpack = rawBytesUnpack1PK - rawBytesPK1; + const ui64 bytesUnpack = bytesUnpack1PK - bytesPK1; + const ui64 rawBytesPack = rawBytesPackAndUnpack2PK - rawBytesUnpack1PK - rawBytesPK1; + const ui64 bytesPack = bytesPackAndUnpack2PK - bytesUnpack1PK - bytesPK1; + TStringBuilder result; + result << "unpacked data: " << rawBytesUnpack << " / " << bytesUnpack << Endl; + result << "packed data: " << rawBytesPack << " / " << bytesPack << Endl; + result << "frq_diff: " << 1.0 * bytesPack / bytesUnpack << Endl; + result << "frq_compression: " << 1.0 * bytesPack / rawBytesPack << Endl; + result << "pk_size : " << rawBytesPK1 << " / " << bytesPK1 << Endl; + Cerr << result << Endl; + Y_ABORT_UNLESS(bytesPack / bytesUnpack < 0.1); + } + + Y_UNIT_TEST(StatsSysViewBytesPackActualization) { + ui64 rawBytesPK1; + ui64 bytesPK1; + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTypedLocalHelper helper("", kikimr, "olapTable", "olapStore"); + helper.CreateTestOlapTable(); + helper.FillPKOnly(0, 800000); + helper.GetVolumes(rawBytesPK1, bytesPK1, false, {"pk_int"}); + auto tableClient = kikimr.GetTableClient(); + { + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=pk_int, `SERIALIZER.CLASS_NAME`=`ARROW_SERIALIZER`, `COMPRESSION.TYPE`=`zstd`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); + csController->WaitActualization(TDuration::Seconds(10)); + ui64 rawBytesPK2; + ui64 bytesPK2; + helper.GetVolumes(rawBytesPK2, bytesPK2, false, {"pk_int"}); + AFL_VERIFY(rawBytesPK2 == rawBytesPK1)("pk1", rawBytesPK1)("pk2", rawBytesPK2); + AFL_VERIFY(bytesPK2 < bytesPK1 / 3)("pk1", bytesPK1)("pk2", bytesPK2); + } + { + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=pk_int, `SERIALIZER.CLASS_NAME`=`ARROW_SERIALIZER`, `COMPRESSION.TYPE`=`lz4`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); + csController->WaitActualization(TDuration::Seconds(10)); + ui64 rawBytesPK2; + ui64 bytesPK2; + helper.GetVolumes(rawBytesPK2, bytesPK2, false, {"pk_int"}); + AFL_VERIFY(rawBytesPK2 == rawBytesPK1)("pk1", rawBytesPK1)("pk2", rawBytesPK2); + AFL_VERIFY(bytesPK2 < bytesPK1 * 1.01 && bytesPK1 < bytesPK2 * 1.01)("pk1", bytesPK1)("pk2", bytesPK2); + } + } + + Y_UNIT_TEST(StatsSysViewBytesColumnActualization) { + ui64 rawBytes1; + ui64 bytes1; + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", kikimr); + helper.CreateTestOlapTable(); + NArrow::NConstruction::TStringPoolFiller sPool(3, 52); + helper.FillTable(sPool, 0, 800000); + helper.GetVolumes(rawBytes1, bytes1, false, {"new_column_ui64"}); + AFL_VERIFY(rawBytes1 == 0); + AFL_VERIFY(bytes1 == 0); + auto tableClient = kikimr.GetTableClient(); + { + helper.ExecuteSchemeQuery("ALTER TABLESTORE `/Root/olapStore` ADD COLUMN new_column_ui64 Uint64;"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); + csController->WaitActualization(TDuration::Seconds(10)); + ui64 rawBytes2; + ui64 bytes2; + helper.GetVolumes(rawBytes2, bytes2, false, {"new_column_ui64"}); + AFL_VERIFY(rawBytes2 == 6500041)("real", rawBytes2); + AFL_VERIFY(bytes2 == 45360)("b", bytes2); + } + } + + Y_UNIT_TEST(StatsSysViewBytesDictActualization) { + ui64 rawBytes1; + ui64 bytes1; + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + auto settings = TKikimrSettings() + .SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", kikimr); + helper.CreateTestOlapTable(); + NArrow::NConstruction::TStringPoolFiller sPool(3, 52); + helper.FillTable(sPool, 0, 800000); + helper.GetVolumes(rawBytes1, bytes1, false, {"field"}); + auto tableClient = kikimr.GetTableClient(); + { + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); + csController->WaitActualization(TDuration::Seconds(10)); + ui64 rawBytes2; + ui64 bytes2; + helper.GetVolumes(rawBytes2, bytes2, false, {"field"}); + AFL_VERIFY(rawBytes2 == rawBytes1)("f1", rawBytes1)("f2", rawBytes2); + AFL_VERIFY(bytes2 < bytes1 * 0.5)("f1", bytes1)("f2", bytes2); + } + { + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`false`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); + csController->WaitActualization(TDuration::Seconds(10)); + ui64 rawBytes2; + ui64 bytes2; + helper.GetVolumes(rawBytes2, bytes2, false, {"field"}); + AFL_VERIFY(rawBytes2 == rawBytes1)("f1", rawBytes1)("f2", rawBytes2); + AFL_VERIFY(bytes2 < bytes1 * 1.01 && bytes1 < bytes2 * 1.01)("f1", bytes1)("f2", bytes2); + } + } + + Y_UNIT_TEST(StatsSysViewBytesDictStatActualization) { + ui64 rawBytes1; + ui64 bytes1; + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + TTypedLocalHelper helper("Utf8", kikimr); + helper.CreateTestOlapTable(); + NArrow::NConstruction::TStringPoolFiller sPool(3, 52); + helper.FillTable(sPool, 0, 800000); + helper.GetVolumes(rawBytes1, bytes1, false, {"field"}); + auto tableClient = kikimr.GetTableClient(); + { + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=ALTER_COLUMN, NAME=field, `ENCODING.DICTIONARY.ENABLED`=`true`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=field_var, TYPE=variability, FEATURES=`{\"column_name\" : \"field\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=pk_int_max, TYPE=max, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); + csController->WaitActualization(TDuration::Seconds(10)); + ui64 rawBytes2; + ui64 bytes2; + helper.GetVolumes(rawBytes2, bytes2, false, {"field"}); + AFL_VERIFY(rawBytes2 == rawBytes1)("f1", rawBytes1)("f2", rawBytes2); + AFL_VERIFY(bytes2 < bytes1 * 0.5)("f1", bytes1)("f2", bytes2); + std::vector stats; + helper.GetStats(stats, true); + for (auto&& i : stats) { + AFL_VERIFY(i.ScalarsSize() == 2); + AFL_VERIFY(i.GetScalars()[0].GetUint32() == 3); + } + } + { + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=DROP_STAT, NAME=pk_int_max);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); + csController->WaitActualization(TDuration::Seconds(10)); + std::vector stats; + helper.GetStats(stats, true); + for (auto&& i : stats) { + AFL_VERIFY(i.ScalarsSize() == 1); + AFL_VERIFY(i.GetScalars()[0].GetUint32() == 3); + } + } + { + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_STAT, NAME=pk_int_max, TYPE=max, FEATURES=`{\"column_name\" : \"pk_int\"}`);"); + helper.ExecuteSchemeQuery("ALTER OBJECT `/Root/olapStore` (TYPE TABLESTORE) SET (ACTION=UPSERT_OPTIONS, SCHEME_NEED_ACTUALIZATION=`true`);"); + csController->WaitActualization(TDuration::Seconds(10)); + std::vector stats; + helper.GetStats(stats, true); + for (auto&& i : stats) { + AFL_VERIFY(i.ScalarsSize() == 2); + AFL_VERIFY(i.GetScalars()[0].GetUint32() == 3); + } + } + } + + Y_UNIT_TEST(StatsSysViewColumns) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + TKikimrRunner kikimr(settings); + + TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable(); + for (ui64 i = 0; i < 10; ++i) { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000 + i*10000, 2000); + } + + auto tableClient = kikimr.GetTableClient(); + + { + auto selectQuery = TString(R"( + SELECT TabletId, PathId, Kind + FROM `/Root/olapStore/.sys/store_primary_index_stats` + ORDER BY PathId, Kind, TabletId + LIMIT 4; + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 4); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[3].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[3].at("Kind")), "INSERTED"); + } + { + auto selectQuery = TString(R"( + SELECT SUM(BlobRangeSize) as Bytes, SUM(Rows) as Rows, PathId, TabletId + FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, TabletId + ORDER BY Bytes + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3); + UNIT_ASSERT_LE(GetUint64(rows[0].at("Bytes")), GetUint64(rows[1].at("Bytes"))); + } + { + auto selectQuery = TString(R"( + SELECT Sum(Rows) as Rows, Kind, Sum(RawBytes) as RawBytes, Sum(Rows) as Rows2, Sum(Rows) as Rows3, PathId + FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY Kind, PathId + ORDER BY PathId, Kind, Rows3 + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 2); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("Rows2")), GetUint64(rows[0].at("Rows3"))); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("Rows")), GetUint64(rows[1].at("Rows3"))); + } + } + + Y_UNIT_TEST(StatsSysViewRanges) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetCompactionControl(NYDBTest::EOptimizerCompactionWeightControl::Disable); + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + TLocalHelper(kikimr).CreateTestOlapTable("olapTable_1"); + TLocalHelper(kikimr).CreateTestOlapTable("olapTable_2"); + TLocalHelper(kikimr).CreateTestOlapTable("olapTable_3"); + + for (ui64 i = 0; i < 10; ++i) { + WriteTestData(kikimr, "/Root/olapStore/olapTable_1", 0, 1000000 + i*10000, 2000); + WriteTestData(kikimr, "/Root/olapStore/olapTable_2", 0, 1000000 + i*10000, 3000); + WriteTestData(kikimr, "/Root/olapStore/olapTable_3", 0, 1000000 + i*10000, 5000); + } + + auto tableClient = kikimr.GetTableClient(); + + { + auto selectQuery = TString(R"( + SELECT * + FROM `/Root/olapStore/.sys/store_primary_index_stats` + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE + PathId == UInt64("3") AND Activity = true + GROUP BY TabletId, PathId, Kind + ORDER BY TabletId, Kind + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[2].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[1].at("Kind")), "INSERTED"); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId + FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, Kind, TabletId + ORDER BY PathId DESC, Kind DESC, TabletId DESC + ; + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + ui32 numExpected = 3*3; + UNIT_ASSERT_VALUES_EQUAL(rows.size(), numExpected); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[numExpected-1].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[numExpected-1].at("Kind")), "INSERTED"); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE + PathId > UInt64("0") AND PathId < UInt32("4") + OR PathId > UInt64("4") AND PathId <= UInt64("5") + GROUP BY PathId, Kind, TabletId + ORDER BY + PathId DESC, Kind DESC, TabletId DESC + ; + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + ui32 numExpected = 2*3; + UNIT_ASSERT_VALUES_EQUAL(rows.size(), numExpected); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[0].at("Kind")), "INSERTED"); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[numExpected-1].at("PathId")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUtf8(rows[numExpected-1].at("Kind")), "INSERTED"); + } + } + + Y_UNIT_TEST(StatsSysViewFilter) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable(); + for (ui64 i = 0; i < 10; ++i) { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 0, 1000000 + i*10000, 2000); + } + + auto tableClient = kikimr.GetTableClient(); + + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId, Sum(BlobRangeSize) as Bytes + FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, Kind, TabletId + ORDER BY PathId, Kind, TabletId; + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_GE(rows.size(), 3); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId, Sum(BlobRangeSize) as Bytes + FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, Kind, TabletId + ORDER BY PathId, Kind, TabletId; + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_GE(rows.size(), 3); + } + + { + auto selectQuery = TString(R"( + SELECT * + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE Kind == 'EVICTED' + ORDER BY PathId, Kind, TabletId; + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_GE(rows.size(), 0); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, Kind, TabletId + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE Kind IN ('SPLIT_COMPACTED', 'INACTIVE', 'EVICTED') + GROUP BY PathId, Kind, TabletId + ORDER BY PathId, Kind, TabletId; + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + + UNIT_ASSERT_GE(rows.size(), 3); + } + } + + Y_UNIT_TEST(StatsSysViewAggregation) { + auto settings = TKikimrSettings().SetWithSampleTables(false); + TKikimrRunner kikimr(settings); + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable("olapTable_1"); + TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable("olapTable_2"); + TLocalHelper(kikimr.GetTestServer()).CreateTestOlapTable("olapTable_3"); + + for (ui64 i = 0; i < 100; ++i) { + WriteTestData(kikimr, "/Root/olapStore/olapTable_1", 0, 1000000 + i*10000, 1000); + WriteTestData(kikimr, "/Root/olapStore/olapTable_2", 0, 1000000 + i*10000, 2000); + WriteTestData(kikimr, "/Root/olapStore/olapTable_3", 0, 1000000 + i*10000, 3000); + } + + Tests::NCommon::TLoggerInit(kikimr).Initialize(); + + auto tableClient = kikimr.GetTableClient(); + + { + auto selectQuery = TString(R"( + SELECT + SUM(Rows) as rows, + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE + Kind != 'INACTIVE' + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1ull); + } + + { + auto selectQuery = TString(R"( + SELECT + PathId, + SUM(Rows) as rows, + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE + Kind != 'INACTIVE' + GROUP BY + PathId + ORDER BY + PathId + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 4); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("PathId")), 5); + } + + { + auto selectQuery = TString(R"( + SELECT + PathId, + SUM(Rows) as rows, + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE + Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') + GROUP BY PathId + ORDER BY rows DESC + LIMIT 10 + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 4); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[2].at("PathId")), 3); + } + + { + auto selectQuery = TString(R"( + SELECT + PathId, + SUM(Rows) as rows, + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE + PathId == UInt64("3") AND Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') + GROUP BY PathId + ORDER BY rows DESC + LIMIT 10 + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 1ull); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 3); + } + + { + auto selectQuery = TString(R"( + SELECT + PathId, + SUM(Rows) as rows, + SUM(BlobRangeSize) as bytes, + SUM(RawBytes) as bytes_raw + FROM `/Root/olapStore/.sys/store_primary_index_stats` + WHERE + PathId >= UInt64("4") AND Kind IN ('INSERTED', 'SPLIT_COMPACTED', 'COMPACTED') + GROUP BY PathId + ORDER BY rows DESC + LIMIT 10 + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 2ull); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("PathId")), 5); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[1].at("PathId")), 4); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, TabletId, Kind + FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId, TabletId, Kind + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + // 3 Tables with 3 Shards each and 2 KindId-s of stats + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3 * 3 * 2); + } + + { + auto selectQuery = TString(R"( + SELECT + count(distinct(PathId)), + count(distinct(Kind)), + count(distinct(TabletId)) + FROM `/Root/olapStore/.sys/store_primary_index_stats` + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column0")), 3ull); + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[0].at("column1")), 2); + UNIT_ASSERT_GE(GetUint64(rows[0].at("column2")), 3ull); + } + + { + auto selectQuery = TString(R"( + SELECT PathId, count(*), sum(Rows), sum(BlobRangeSize), sum(RawBytes) + FROM `/Root/olapStore/.sys/store_primary_index_stats` + GROUP BY PathId + ORDER BY PathId + )"); + + auto rows = ExecuteScanQuery(tableClient, selectQuery); + UNIT_ASSERT_VALUES_EQUAL(rows.size(), 3ull); + for (ui64 pathId = 3, row = 0; pathId <= 5; ++pathId, ++row) { + UNIT_ASSERT_VALUES_EQUAL(GetUint64(rows[row].at("PathId")), pathId); + } + } + } +} + +} // namespace diff --git a/ydb/core/kqp/ut/olap/write_ut.cpp b/ydb/core/kqp/ut/olap/write_ut.cpp new file mode 100644 index 000000000000..42310b02eac9 --- /dev/null +++ b/ydb/core/kqp/ut/olap/write_ut.cpp @@ -0,0 +1,111 @@ +#include "helpers/local.h" +#include "helpers/writer.h" + +#include +#include +#include +#include + +namespace NKikimr::NKqp { + +Y_UNIT_TEST_SUITE(KqpOlapWrite) { + Y_UNIT_TEST(TierDraftsGC) { + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetIndexWriteControllerEnabled(false); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1)); + Singleton()->ResetWriteCounters(); + + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + TLocalHelper(kikimr).CreateTestOlapTable(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + while (csController->GetInsertStartedCounter().Val() == 0) { + Cout << "Wait indexation..." << Endl; + Sleep(TDuration::Seconds(2)); + } + while (!Singleton()->GetWritesCount() || !csController->GetIndexWriteControllerBrokeCount().Val()) { + Cout << "Wait errors on write... " << Singleton()->GetWritesCount() << "/" << csController->GetIndexWriteControllerBrokeCount().Val() << Endl; + Sleep(TDuration::Seconds(2)); + } + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + const auto startInstant = TMonotonic::Now(); + while (Singleton()->GetSize() && TMonotonic::Now() - startInstant < TDuration::Seconds(200)) { + Cerr << "Waiting empty... " << Singleton()->GetSize() << Endl; + Sleep(TDuration::Seconds(2)); + } + + AFL_VERIFY(!Singleton()->GetSize()); + } + + Y_UNIT_TEST(TierDraftsGCWithRestart) { + auto csController = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csController->SetIndexWriteControllerEnabled(false); + csController->SetPeriodicWakeupActivationPeriod(TDuration::Seconds(1000)); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); + Singleton()->ResetWriteCounters(); + + auto settings = TKikimrSettings() + .SetWithSampleTables(false) + .SetForceColumnTablesCompositeMarks(true); + TKikimrRunner kikimr(settings); + TLocalHelper(kikimr).CreateTestOlapTable(); + Tests::NCommon::TLoggerInit(kikimr).SetComponents({NKikimrServices::TX_COLUMNSHARD}, "CS").SetPriority(NActors::NLog::PRI_DEBUG).Initialize(); + auto tableClient = kikimr.GetTableClient(); + + { + WriteTestData(kikimr, "/Root/olapStore/olapTable", 30000, 1000000, 11000); + } + while (csController->GetInsertStartedCounter().Val() == 0) { + Cout << "Wait indexation..." << Endl; + Sleep(TDuration::Seconds(2)); + } + while (Singleton()->GetWritesCount() < 20 || !csController->GetIndexWriteControllerBrokeCount().Val()) { + Cout << "Wait errors on write... " << Singleton()->GetWritesCount() << "/" << csController->GetIndexWriteControllerBrokeCount().Val() << Endl; + Sleep(TDuration::Seconds(2)); + } + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Indexation); + csController->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + AFL_VERIFY(Singleton()->GetSize()); + { + const auto startInstant = TMonotonic::Now(); + AFL_VERIFY(Singleton()->GetDeletesCount() == 0)("count", Singleton()->GetDeletesCount()); + while (Singleton()->GetSize() && TMonotonic::Now() - startInstant < TDuration::Seconds(200)) { + for (auto&& i : csController->GetShardActualIds()) { + kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new TEvents::TEvPoisonPill(), i, false)); + } + csController->EnableBackground(NKikimr::NYDBTest::ICSController::EBackground::GC); + Cerr << "Waiting empty... " << Singleton()->GetSize() << Endl; + Sleep(TDuration::Seconds(2)); + } + } + + { + const auto startInstant = TMonotonic::Now(); + while (TMonotonic::Now() - startInstant < TDuration::Seconds(10)) { + for (auto&& i : csController->GetShardActualIds()) { + kikimr.GetTestServer().GetRuntime()->Send(MakePipePeNodeCacheID(false), NActors::TActorId(), new TEvPipeCache::TEvForward( + new TEvents::TEvPoisonPill(), i, false)); + } + Cerr << "Waiting empty... " << Singleton()->GetWritesCount() << "/" << Singleton()->GetDeletesCount() << Endl; + Sleep(TDuration::MilliSeconds(500)); + } + } + + AFL_VERIFY(!Singleton()->GetSize()); + const auto writesCount = Singleton()->GetWritesCount(); + const auto deletesCount = Singleton()->GetDeletesCount(); + AFL_VERIFY(deletesCount <= writesCount + 1)("writes", writesCount)("deletes", deletesCount); + } + +} + +} // namespace diff --git a/ydb/core/kqp/ut/olap/ya.make b/ydb/core/kqp/ut/olap/ya.make index 8dfe183405bd..f359312ebdbb 100644 --- a/ydb/core/kqp/ut/olap/ya.make +++ b/ydb/core/kqp/ut/olap/ya.make @@ -14,7 +14,14 @@ ENDIF() SRCS( kqp_olap_stats_ut.cpp - kqp_olap_ut.cpp + GLOBAL kqp_olap_ut.cpp + sys_view_ut.cpp + indexes_ut.cpp + GLOBAL blobs_sharing_ut.cpp + statistics_ut.cpp + clickbench_ut.cpp + aggregations_ut.cpp + write_ut.cpp ) PEERDIR( @@ -22,6 +29,7 @@ PEERDIR( ydb/core/kqp/ut/common ydb/library/yql/sql/pg_dummy ydb/core/tx/columnshard/hooks/testing + ydb/core/kqp/ut/olap/helpers ydb/core/tx/datashard/ut_common ) diff --git a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp index 3b7692a6ad30..d68616aecab4 100644 --- a/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp +++ b/ydb/core/kqp/ut/scheme/kqp_scheme_ut.cpp @@ -1,5 +1,6 @@ #include #include +#include #include #include #include @@ -3994,7 +3995,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { auto query = TStringBuilder() << R"( --!syntax_v1 CREATE TABLESTORE `)" << tableStoreName << R"(` ( - Key Uint64, + Key Uint64 NOT NULL, Value1 String, PRIMARY KEY (Key) ) @@ -4034,7 +4035,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { auto query = TStringBuilder() << R"( --!syntax_v1 CREATE TABLE `)" << tableName << R"(` ( - Key Uint64, + Key Uint64 NOT NULL, Value1 String, PRIMARY KEY (Key) ) @@ -4049,7 +4050,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { auto query2 = TStringBuilder() << R"( --!syntax_v1 CREATE TABLESTORE `)" << tableStoreName << R"(` ( - Key Uint64, + Key Uint64 NOT NULL, Value1 String, PRIMARY KEY (Key) ) @@ -4145,7 +4146,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { auto query = TStringBuilder() << R"( --!syntax_v1 CREATE TABLESTORE `)" << tableStoreName << R"(` ( - Key Uint64, + Key Uint64 NOT NULL, Value1 String, PRIMARY KEY (Key) ) @@ -4160,7 +4161,7 @@ Y_UNIT_TEST_SUITE(KqpScheme) { auto query2 = TStringBuilder() << R"( --!syntax_v1 CREATE TABLE `)" << tableName << R"(` ( - Key Uint64, + Key Uint64 NOT NULL, Value1 String, PRIMARY KEY (Key) ) @@ -5129,6 +5130,37 @@ Y_UNIT_TEST_SUITE(KqpScheme) { Y_UNIT_TEST_SUITE(KqpOlapScheme) { + Y_UNIT_TEST(DropTable) { + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + + TVector schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("id_second").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("level").SetType(NScheme::NTypeIds::Int32), + TTestHelper::TColumnSchema().SetName("created_at").SetType(NScheme::NTypeIds::Timestamp).SetNullable(false) + }; + + TTestHelper::TColumnTable testTable; + testTable + .SetName("/Root/ColumnTableTest") + .SetPrimaryKey({ "id", "id_second" }) + .SetSharding({ "id" }) + .SetMinPartitionsCount(16) + .SetSchema(schema); + testHelper.CreateTable(testTable); + auto sender = testHelper.GetRuntime().AllocateEdgeActor(); + auto tabletIds = GetColumnTableShards(&testHelper.GetKikimr().GetTestServer(), sender, "/Root/ColumnTableTest"); + for (auto tablet: tabletIds) { + UNIT_ASSERT_C(testHelper.GetKikimr().GetTestClient().TabletExistsInHive(&testHelper.GetRuntime(), tablet), ToString(tablet) + " not alive"); + } + testHelper.DropTable("/Root/ColumnTableTest"); + for (auto tablet: tabletIds) { + UNIT_ASSERT_C(!testHelper.GetKikimr().GetTestClient().TabletExistsInHive(&testHelper.GetRuntime(), tablet), ToString(tablet) + " is alive"); + } + } + Y_UNIT_TEST(AddColumnLongPk) { TKikimrSettings runnerSettings; runnerSettings.WithSampleTables = false; @@ -5210,6 +5242,52 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { } } + Y_UNIT_TEST(InvalidColumnInTieringRule) { + auto csController = NYDBTest::TControllers::RegisterCSControllerGuard(); + + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).Initialize(); + + const TString tableName = "/Root/ColumnTableTest"; + + TVector schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("id_second").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("level").SetType(NScheme::NTypeIds::Int32), + TTestHelper::TColumnSchema().SetName("created_at").SetType(NScheme::NTypeIds::Timestamp).SetNullable(false) + }; + + TTestHelper::TColumnTable testTable; + testTable.SetName(tableName).SetPrimaryKey({"id", "id_second"}).SetSharding({"id"}).SetSchema(schema).SetTTL("created_at", "Interval(\"PT1H\")"); + testHelper.CreateTable(testTable); + testHelper.CreateTier("tier1"); + + { + TTestHelper::TUpdatesBuilder tableInserter(testTable.GetArrowSchema(schema)); + tableInserter.AddRow().Add(1).Add(1).Add(7).Add((TInstant::Now() - TDuration::Days(30)).MilliSeconds()); + tableInserter.AddRow().Add(1).Add(2).Add(7).Add((TInstant::Now() - TDuration::Days(30)).MilliSeconds()); + testHelper.BulkUpsert(testTable, tableInserter); + } + + while (csController->GetInsertFinishedCounter().Val() == 0) { + Cout << "Wait indexation..." << Endl; + Sleep(TDuration::Seconds(2)); + } + + // const auto ruleName = testHelper.CreateTieringRule("tier1", "created_att"); + const auto ruleName = testHelper.CreateTieringRule("tier1", "created_at"); + testHelper.SetTiering(tableName, ruleName); + + while (csController->GetTieringUpdates().Val() == 0) { + Cout << "Wait tiering..." << Endl; + Sleep(TDuration::Seconds(2)); + } + + testHelper.RebootTablets(tableName); + } + Y_UNIT_TEST(AddColumnWithTtl) { TKikimrSettings runnerSettings; runnerSettings.WithSampleTables = false; @@ -5242,7 +5320,6 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { UNIT_ASSERT_VALUES_EQUAL(description.GetTtlSettings()->GetDateTypeColumn().GetExpireAfter(), TDuration::Hours(1)); } { - schema.push_back(TTestHelper::TColumnSchema().SetName("new_column").SetType(NScheme::NTypeIds::Uint64)); auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << "` ADD COLUMN new_column Uint64;"; auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -5257,12 +5334,7 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { UNIT_ASSERT_VALUES_EQUAL(columns.size(), 5); UNIT_ASSERT_VALUES_EQUAL(description.GetTtlSettings()->GetDateTypeColumn().GetExpireAfter(), TDuration::Hours(1)); } - { - schema.push_back(TTestHelper::TColumnSchema().SetName("new_column").SetType(NScheme::NTypeIds::Uint64)); - auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << R"(` SET(TIERING = 'tiering1');)"; - auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } + testHelper.SetTiering("/Root/ColumnTableTest", "tiering1"); { auto settings = TDescribeTableSettings().WithTableStatistics(true); auto describeResult = testHelper.GetSession().DescribeTable("/Root/ColumnTableTest", settings).GetValueSync(); @@ -5274,7 +5346,6 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { UNIT_ASSERT_VALUES_EQUAL(description.GetTtlSettings()->GetDateTypeColumn().GetExpireAfter(), TDuration::Hours(1)); } { - schema.push_back(TTestHelper::TColumnSchema().SetName("new_column").SetType(NScheme::NTypeIds::Uint64)); auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << R"(` RESET (TTL);)"; auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); @@ -5289,12 +5360,7 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { UNIT_ASSERT_VALUES_EQUAL(*description.GetTiering(), "tiering1"); UNIT_ASSERT(!description.GetTtlSettings()); } - { - schema.push_back(TTestHelper::TColumnSchema().SetName("new_column").SetType(NScheme::NTypeIds::Uint64)); - auto alterQuery = TStringBuilder() << "ALTER TABLE `" << testTable.GetName() << R"(` RESET (TIERING);)"; - auto alterResult = testHelper.GetSession().ExecuteSchemeQuery(alterQuery).GetValueSync(); - UNIT_ASSERT_VALUES_EQUAL_C(alterResult.GetStatus(), EStatus::SUCCESS, alterResult.GetIssues().ToString()); - } + testHelper.ResetTiering("/Root/ColumnTableTest"); { auto settings = TDescribeTableSettings().WithTableStatistics(true); auto describeResult = testHelper.GetSession().DescribeTable("/Root/ColumnTableTest", settings).GetValueSync(); @@ -5368,7 +5434,7 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { TTestHelper::TColumnSchema().SetName("resource_id").SetType(NScheme::NTypeIds::Utf8), TTestHelper::TColumnSchema().SetName("level").SetType(NScheme::NTypeIds::Int32) }; - + Tests::NCommon::TLoggerInit(testHelper.GetKikimr()).Initialize(); TTestHelper::TColumnTable testTable; @@ -5533,8 +5599,8 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { testHelper.ReadData("SELECT resource_id FROM `/Root/TableStoreTest/ColumnTableTest` WHERE id=3", "[[[\"test_res_3\"]]]"); testHelper.ReadData("SELECT new_column FROM `/Root/TableStoreTest/ColumnTableTest`", "[[#];[#];[[200u]]]"); - // testHelper.RebootTablets(testTable.GetName()); - // testHelper.ReadData("SELECT new_column FROM `/Root/TableStoreTest/ColumnTableTest`", "[[#];[#];[[200u]]]"); + testHelper.RebootTablets(testTable.GetName()); + testHelper.ReadData("SELECT new_column FROM `/Root/TableStoreTest/ColumnTableTest`", "[[#];[#];[[200u]]]"); } Y_UNIT_TEST(AddColumnErrors) { @@ -5593,6 +5659,19 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { testHelper.ReadData("SELECT * FROM `/Root/ColumnTableTest` WHERE id=1", "[]"); } + Y_UNIT_TEST(BulkError) { + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + + TVector schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(false), + TTestHelper::TColumnSchema().SetName("level").SetType(NScheme::NTypeIds::Uuid).SetNullable(true) + }; + TTestHelper::TColumnTable testTable; + testTable.SetName("/Root/ColumnTableTest").SetPrimaryKey({"id"}).SetSharding({"id"}).SetSchema(schema); + testHelper.CreateTable(testTable, NYdb::EStatus::SCHEME_ERROR); + } Y_UNIT_TEST(DropColumn) { TKikimrSettings runnerSettings; runnerSettings.WithSampleTables = false; @@ -5840,6 +5919,26 @@ Y_UNIT_TEST_SUITE(KqpOlapScheme) { } testHelper.ReadData("SELECT COUNT(*) FROM `/Root/ColumnTableTest`", "[[10000u]]"); } + + Y_UNIT_TEST(NullKeySchema) { + TKikimrSettings runnerSettings; + runnerSettings.WithSampleTables = false; + TTestHelper testHelper(runnerSettings); + + TVector schema = { + TTestHelper::TColumnSchema().SetName("id").SetType(NScheme::NTypeIds::Int32).SetNullable(true), + TTestHelper::TColumnSchema().SetName("resource_id").SetType(NScheme::NTypeIds::Utf8).SetNullable(false), + TTestHelper::TColumnSchema().SetName("level").SetType(NScheme::NTypeIds::Int32).SetNullable(false) + }; + TTestHelper::TColumnTableStore testTableStore; + testTableStore.SetName("/Root/TableStoreTest").SetPrimaryKey({"id"}).SetSchema(schema); + testHelper.CreateTable(testTableStore, EStatus::SCHEME_ERROR); + + TTestHelper::TColumnTable testTable; + testTable.SetName("/Root/ColumnTableTest").SetPrimaryKey({"id"}).SetSchema(schema); + testHelper.CreateTable(testTable, EStatus::SCHEME_ERROR); + } + } Y_UNIT_TEST_SUITE(KqpOlapTypes) { diff --git a/ydb/core/kqp/ut/scheme/ya.make b/ydb/core/kqp/ut/scheme/ya.make index d086e49f2bcf..4b73a38eb2ea 100644 --- a/ydb/core/kqp/ut/scheme/ya.make +++ b/ydb/core/kqp/ut/scheme/ya.make @@ -23,6 +23,7 @@ PEERDIR( ydb/core/kqp ydb/core/kqp/ut/common ydb/library/yql/sql/pg_dummy + ydb/core/tx/columnshard/hooks/testing ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/protos/config.proto b/ydb/core/protos/config.proto index ec5ce939a55f..c4b01265d7da 100644 --- a/ydb/core/protos/config.proto +++ b/ydb/core/protos/config.proto @@ -1481,7 +1481,6 @@ message TColumnShardConfig { TMinimalTablesCountPolicy MinimalTables = 1; TIdentityGroupsPolicy IdentityGroups = 2; } - } optional TTablesStorageLayoutPolicy TablesStorageLayoutPolicy = 1; optional bool DisabledOnSchemeShard = 2 [default = true]; @@ -1494,6 +1493,15 @@ message TColumnShardConfig { optional bool UseChunkedMergeOnCompaction = 9 [default = true]; optional uint64 CompactionMemoryLimit = 10 [default = 536870912]; optional uint64 TieringsMemoryLimit = 11 [default = 536870912]; + message TIndexMetadataMemoryLimit { + oneof Value { + double TotalRatio = 1 [default = 0.3]; + uint64 AbsoluteValue = 2; + } + } + + optional TIndexMetadataMemoryLimit IndexMetadataMemoryLimit = 12; + optional bool CleanupEnabled = 13 [default = true]; } message TSchemeShardConfig { diff --git a/ydb/core/protos/counters_columnshard.proto b/ydb/core/protos/counters_columnshard.proto index 428621d0f20b..b2002ee2cca8 100644 --- a/ydb/core/protos/counters_columnshard.proto +++ b/ydb/core/protos/counters_columnshard.proto @@ -49,6 +49,7 @@ enum ESimpleCounters { COUNTER_EVICTED_BYTES = 39 [(CounterOpts) = {Name: "Index/EvictedBytes"}]; COUNTER_EVICTED_RAW_BYTES = 40 [(CounterOpts) = {Name: "Index/EvictedBytesRaw"}]; COUNTER_WRITES_IN_FLY = 41 [(CounterOpts) = {Name: "WritesInFly"}]; + COUNTER_TX_COMPLETE_LAG = 42 [(CounterOpts) = {Name: "TxCompleteLag"}]; } enum ECumulativeCounters { @@ -177,5 +178,16 @@ enum ETxTypes { TXTYPE_WRITE_DRAFT = 15 [(TxTypeOpts) = {Name: "TxWriteDraft"}]; TXTYPE_CLEANUP_INSERT_TABLE = 16 [(TxTypeOpts) = {Name: "TxInsertTableCleanup"}]; TXTYPE_GC_FINISHED = 17 [(TxTypeOpts) = {Name: "TxGarbageCollectionFinished"}]; - + TXTYPE_DELETE_SHARED_BLOBS = 18 [(TxTypeOpts) = {Name: "TxDeleteSharedBlobs"}]; + TXTYPE_DATA_SHARING_FINISH_ACK_TO_SOURCE = 19 [(TxTypeOpts) = {Name: "TxDataSharingFinishAckToSource"}]; + TXTYPE_DATA_SHARING_START_TO_SOURCE = 20 [(TxTypeOpts) = {Name: "TxDataSharingStartToSource"}]; + TXTYPE_DATA_SHARING_DATA_ACK_TO_SOURCE = 21 [(TxTypeOpts) = {Name: "TxDataSharingDataAckToSource"}]; + TXTYPE_DATA_SHARING_DATA_FROM_SOURCE = 22 [(TxTypeOpts) = {Name: "TxDataSharingDataFromSource"}]; + TXTYPE_DATA_SHARING_FINISH_ACK_FROM_INITIATOR = 23 [(TxTypeOpts) = {Name: "TxDataSharingFinishAckFromInitiator"}]; + TXTYPE_DATA_SHARING_FINISH_FROM_SOURCE = 24 [(TxTypeOpts) = {Name: "TxDataSharingFinishFromSource"}]; + TXTYPE_DATA_SHARING_PROPOSE_FROM_INITIATOR = 25 [(TxTypeOpts) = {Name: "TxDataSharingProposeFromInitiator"}]; + TXTYPE_DATA_SHARING_CONFIRM_FROM_INITIATOR = 26 [(TxTypeOpts) = {Name: "TxDataSharingConfirmFromInitiator"}]; + TXTYPE_DATA_SHARING_APPLY_LINKS_MODIFICATION = 27 [(TxTypeOpts) = {Name: "TxDataSharingApplyLinksModification"}]; + TXTYPE_DATA_SHARING_WRITE_SOURCE_CURSOR = 28 [(TxTypeOpts) = {Name: "TxDataSharingWriteSourceCursor"}]; + TXTYPE_EXPORT_SAVE_CURSOR = 29 [(TxTypeOpts) = {Name: "TxExportSaveCursor"}]; } diff --git a/ydb/core/protos/flat_scheme_op.proto b/ydb/core/protos/flat_scheme_op.proto index d20f3b253de6..399eda8924f0 100644 --- a/ydb/core/protos/flat_scheme_op.proto +++ b/ydb/core/protos/flat_scheme_op.proto @@ -17,6 +17,7 @@ import "ydb/public/api/protos/ydb_value.proto"; import "ydb/library/actors/protos/actors.proto"; import "ydb/library/mkql_proto/protos/minikql.proto"; import "ydb/core/protos/index_builder.proto"; +import "ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto"; import "google/protobuf/empty.proto"; @@ -418,6 +419,7 @@ message TOlapColumnDiff { optional string Name = 1; optional TDictionaryEncodingSettings DictionaryEncoding = 4; optional TOlapColumn.TSerializer Serializer = 5; + optional string StorageId = 6; } message TOlapColumnDescription { @@ -435,6 +437,7 @@ message TOlapColumnDescription { optional TCompressionOptions Compression = 8[deprecated = true]; optional TDictionaryEncodingSettings DictionaryEncoding = 9; optional TOlapColumn.TSerializer Serializer = 10; + optional string StorageId = 11; } message TRequestedBloomFilter { @@ -445,8 +448,9 @@ message TRequestedBloomFilter { message TOlapIndexRequested { optional string Name = 1; optional TCompressionOptions Compression = 3; + optional string StorageId = 4; - optional string ClassName = 2; + optional string ClassName = 39; oneof Implementation { TRequestedBloomFilter BloomFilter = 40; } @@ -465,9 +469,11 @@ message TOlapIndexDescription { optional string Name = 2; optional TCompressionOptions Compression = 3; - optional string ClassName = 4; + optional string StorageId = 4; + + optional string ClassName = 40; oneof Implementation { - TBloomFilter BloomFilter = 40; + TBloomFilter BloomFilter = 41; } } @@ -487,6 +493,10 @@ message TStorageTierConfig { optional TCompressionOptions Compression = 3; } +message TColumnTableSchemeOptions { + optional bool SchemeNeedActualization = 1 [default = false]; +} + message TColumnTableSchema { // A list of columns for tables with this schema repeated TOlapColumnDescription Columns = 1; @@ -507,6 +517,12 @@ message TColumnTableSchema { optional bool CompositeMarks = 9 [ default = false ]; repeated TOlapIndexDescription Indexes = 10; + repeated NKikimrColumnShardStatisticsProto.TOperatorContainer Statistics = 11; + optional TColumnTableSchemeOptions Options = 12; +} + +message TColumnTableRequestedOptions { + optional bool SchemeNeedActualization = 1 [default = false]; } message TAlterColumnTableSchema { @@ -516,6 +532,9 @@ message TAlterColumnTableSchema { repeated TOlapColumnDiff AlterColumns = 7; repeated TOlapIndexRequested UpsertIndexes = 8; repeated string DropIndexes = 9; + repeated NKikimrColumnShardStatisticsProto.TConstructorContainer UpsertStatistics = 10; + repeated string DropStatistics = 11; + optional TColumnTableRequestedOptions Options = 12; } // Schema presets are used to manage multiple tables with the same schema @@ -1000,6 +1019,9 @@ message TS3Settings { optional uint32 RequestTimeoutMs = 101; optional uint32 HttpRequestTimeoutMs = 102; optional uint32 ConnectionTimeoutMs = 103; + + optional uint32 ExecutorThreadsCount = 104 [default = 32]; + optional uint32 MaxConnectionsCount = 105 [default = 32]; }; message TTaskCleaner { diff --git a/ydb/core/protos/tx_columnshard.proto b/ydb/core/protos/tx_columnshard.proto index 1700172483fa..2caec98084ab 100644 --- a/ydb/core/protos/tx_columnshard.proto +++ b/ydb/core/protos/tx_columnshard.proto @@ -1,4 +1,5 @@ import "ydb/library/actors/protos/actors.proto"; +import "ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto"; import "ydb/core/protos/flat_scheme_op.proto"; import "ydb/core/protos/long_tx_service.proto"; import "ydb/core/protos/subdomains.proto"; @@ -128,6 +129,7 @@ enum ETransactionKind { TX_KIND_TTL = 3; // Immediate (not planned) TX_KIND_DATA = 4; TX_KIND_COMMIT_WRITE = 5; + TX_KIND_BACKUP = 6; } enum ETransactionFlag { @@ -181,6 +183,14 @@ message TCommitTxBody { repeated uint64 WriteIds = 2; } +message TBackupTxBody { + optional NKikimrSchemeOp.TBackupTask BackupTask = 1; +} + +message TCommitWriteTxBody { + optional uint64 LockId = 1; +} + message TSchemaPresetVersionInfo { optional uint64 Id = 1; optional uint64 SinceStep = 2; @@ -277,6 +287,7 @@ message TIndexPortionMeta { optional bytes PrimaryKeyBorders = 6; // arrow::RecordBatch with first and last ReplaceKey rows optional TSnapshot RecordSnapshotMin = 7; optional TSnapshot RecordSnapshotMax = 8; + optional NKikimrColumnShardStatisticsProto.TPortionStorage StatisticsStorage = 9; } message TIndexColumnMeta { diff --git a/ydb/core/protos/ya.make b/ydb/core/protos/ya.make index 13c7f54a784f..014ae2e924ac 100644 --- a/ydb/core/protos/ya.make +++ b/ydb/core/protos/ya.make @@ -158,6 +158,7 @@ PEERDIR( ydb/library/yql/public/types ydb/library/services ydb/library/ydb_issue/proto + ydb/core/tx/columnshard/engines/scheme/statistics/protos ) EXCLUDE_TAGS(GO_PROTO) diff --git a/ydb/core/sys_view/common/schema.cpp b/ydb/core/sys_view/common/schema.cpp index 80ef81d4dec3..66c0d9192327 100644 --- a/ydb/core/sys_view/common/schema.cpp +++ b/ydb/core/sys_view/common/schema.cpp @@ -215,7 +215,11 @@ class TSystemViewResolver : public ISystemViewResolver { RegisterSystemView(QueryMetricsName); RegisterOlapStoreSystemView(StorePrimaryIndexStatsName); + RegisterOlapStoreSystemView(StorePrimaryIndexPortionStatsName); + RegisterOlapStoreSystemView(StorePrimaryIndexGranuleStatsName); RegisterColumnTableSystemView(TablePrimaryIndexStatsName); + RegisterColumnTableSystemView(TablePrimaryIndexPortionStatsName); + RegisterColumnTableSystemView(TablePrimaryIndexGranuleStatsName); RegisterSystemView(TopPartitions1MinuteName); RegisterSystemView(TopPartitions1HourName); diff --git a/ydb/core/sys_view/common/schema.h b/ydb/core/sys_view/common/schema.h index a3ef043fadc8..c2fbbc3a9f04 100644 --- a/ydb/core/sys_view/common/schema.h +++ b/ydb/core/sys_view/common/schema.h @@ -31,7 +31,11 @@ constexpr TStringBuf TabletsName = "hive_tablets"; constexpr TStringBuf QueryMetricsName = "query_metrics_one_minute"; constexpr TStringBuf StorePrimaryIndexStatsName = "store_primary_index_stats"; +constexpr TStringBuf StorePrimaryIndexPortionStatsName = "store_primary_index_portion_stats"; +constexpr TStringBuf StorePrimaryIndexGranuleStatsName = "store_primary_index_granule_stats"; constexpr TStringBuf TablePrimaryIndexStatsName = "primary_index_stats"; +constexpr TStringBuf TablePrimaryIndexPortionStatsName = "primary_index_portion_stats"; +constexpr TStringBuf TablePrimaryIndexGranuleStatsName = "primary_index_granule_stats"; constexpr TStringBuf TopPartitions1MinuteName = "top_partitions_one_minute"; constexpr TStringBuf TopPartitions1HourName = "top_partitions_one_hour"; @@ -463,6 +467,89 @@ struct Schema : NIceDb::Schema { IndexSize, InFlightTxCount>; }; + + struct QuerySessions : Table<13> { + struct SessionId : Column<1, NScheme::NTypeIds::Utf8> {}; + struct NodeId : Column<2, NScheme::NTypeIds::Uint32> {}; + struct State : Column<3, NScheme::NTypeIds::Utf8> {}; + struct Query : Column<4, NScheme::NTypeIds::Utf8> {}; + struct QueryCount : Column<5, NScheme::NTypeIds::Uint32> {}; + struct ClientAddress : Column<6, NScheme::NTypeIds::Utf8> {}; + struct ClientPID : Column<7, NScheme::NTypeIds::Utf8> {}; + struct ClientUserAgent : Column<8, NScheme::NTypeIds::Utf8> {}; + struct ClientSdkBuildInfo : Column<9, NScheme::NTypeIds::Utf8> {}; + struct ApplicationName : Column<10, NScheme::NTypeIds::Utf8> {}; + struct SessionStartAt : Column<11, NScheme::NTypeIds::Timestamp> {}; + struct QueryStartAt : Column<12, NScheme::NTypeIds::Timestamp> {}; + struct StateChangeAt : Column<13, NScheme::NTypeIds::Timestamp> {}; + struct UserSID : Column<14, NScheme::NTypeIds::Utf8> {}; + + using TKey = TableKey; + using TColumns = TableColumns< + SessionId, + NodeId, + State, + Query, + QueryCount, + ClientAddress, + ClientPID, + ClientUserAgent, + ClientSdkBuildInfo, + ApplicationName, + SessionStartAt, + QueryStartAt, + StateChangeAt, + UserSID>; + }; + + struct PrimaryIndexPortionStats: Table<14> { + struct PathId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct Kind: Column<2, NScheme::NTypeIds::Utf8> {}; + struct TabletId: Column<3, NScheme::NTypeIds::Uint64> {}; + struct Rows: Column<4, NScheme::NTypeIds::Uint64> {}; + struct ColumnRawBytes: Column<5, NScheme::NTypeIds::Uint64> {}; + struct IndexRawBytes: Column<6, NScheme::NTypeIds::Uint64> {}; + struct ColumnBlobBytes: Column<7, NScheme::NTypeIds::Uint64> {}; + struct IndexBlobBytes: Column<8, NScheme::NTypeIds::Uint64> {}; + struct PortionId: Column<9, NScheme::NTypeIds::Uint64> {}; + struct Activity: Column<10, NScheme::NTypeIds::Bool> {}; + struct TierName: Column<11, NScheme::NTypeIds::Utf8> {}; + struct Stats: Column<12, NScheme::NTypeIds::Utf8> {}; + + using TKey = TableKey; + using TColumns = TableColumns< + PathId, + Kind, + TabletId, + Rows, + ColumnRawBytes, + IndexRawBytes, + ColumnBlobBytes, + IndexBlobBytes, + PortionId, + Activity, + TierName, + Stats + >; + }; + + struct PrimaryIndexGranuleStats: Table<14> { + struct PathId: Column<1, NScheme::NTypeIds::Uint64> {}; + struct TabletId: Column<2, NScheme::NTypeIds::Uint64> {}; + struct PortionsCount: Column<3, NScheme::NTypeIds::Uint64> {}; + struct HostName: Column<4, NScheme::NTypeIds::Utf8> {}; + struct NodeId: Column<5, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns< + PathId, + TabletId, + PortionsCount, + HostName, + NodeId + >; + }; + }; bool MaybeSystemViewPath(const TVector& path); diff --git a/ydb/core/testlib/common_helper.cpp b/ydb/core/testlib/common_helper.cpp index 26841ce1af6b..d5267342d540 100644 --- a/ydb/core/testlib/common_helper.cpp +++ b/ydb/core/testlib/common_helper.cpp @@ -32,7 +32,7 @@ TLoggerInit::TLoggerInit(NKqp::TKikimrRunner& kikimr) void TLoggerInit::Initialize() { for (auto&& i : Services) { - for (auto&& s : i) { + for (auto&& s : i.second) { Runtime->SetLogPriority(s, Priority); } } @@ -60,7 +60,9 @@ void THelper::StartScanRequest(const TString& request, const bool expectSuccess, }); const TInstant start = TInstant::Now(); while (!resultReady && start + TDuration::Seconds(60) > TInstant::Now()) { + Cerr << "START_SLEEP" << Endl; Server.GetRuntime()->SimulateSleep(TDuration::Seconds(1)); + Cerr << "FINISHED_SLEEP" << Endl; if (scanIterator && !resultReady) { scanIterator->ReadNext().Subscribe([&](NThreading::TFuture streamPartFuture) { NYdb::NTable::TScanQueryPart streamPart = streamPartFuture.GetValueSync(); diff --git a/ydb/core/testlib/common_helper.h b/ydb/core/testlib/common_helper.h index 0160f8e0e8a1..76b3eaf67938 100644 --- a/ydb/core/testlib/common_helper.h +++ b/ydb/core/testlib/common_helper.h @@ -16,10 +16,12 @@ class TLoggerInit { private: NActors::TTestActorRuntime* Runtime; NActors::NLog::EPriority Priority = NActors::NLog::EPriority::PRI_DEBUG; - std::vector> Services = {KqpServices, CSServices}; + THashMap> Services; public: TLoggerInit(NActors::TTestActorRuntime* runtime) : Runtime(runtime) { + Services.emplace("KQP", KqpServices); + Services.emplace("CS", CSServices); } TLoggerInit(NActors::TTestActorRuntime& runtime) : Runtime(&runtime) { @@ -29,12 +31,20 @@ class TLoggerInit { ~TLoggerInit() { Initialize(); } - TLoggerInit& SetComponents(const std::vector services) { - Services = { services }; + TLoggerInit& Clear() { + Services.clear(); return *this; } - TLoggerInit& AddComponents(const std::vector services) { - Services.emplace_back(services); + TLoggerInit& SetComponents(const std::vector services, const TString& name) { + Services[name] = services; + return *this; + } + TLoggerInit& AddComponents(const std::vector services, const TString& name) { + AFL_VERIFY(Services.emplace(name, services).second); + return *this; + } + TLoggerInit& RemoveComponents(const TString& name) { + Services.erase(name); return *this; } TLoggerInit& SetPriority(const NActors::NLog::EPriority priority) { diff --git a/ydb/core/testlib/cs_helper.cpp b/ydb/core/testlib/cs_helper.cpp index 4853290f8637..65a1b1b87148 100644 --- a/ydb/core/testlib/cs_helper.cpp +++ b/ydb/core/testlib/cs_helper.cpp @@ -109,7 +109,7 @@ std::shared_ptr THelper::GetArrowSchema() const { std::vector> fields; fields.emplace_back(arrow::field("timestamp", arrow::timestamp(arrow::TimeUnit::TimeUnit::MICRO), false)); fields.emplace_back(arrow::field("resource_id", arrow::utf8())); - fields.emplace_back(arrow::field("uid", arrow::utf8())); + fields.emplace_back(arrow::field("uid", arrow::utf8(), false)); fields.emplace_back(arrow::field("level", arrow::int32())); fields.emplace_back(arrow::field("message", arrow::utf8())); if (GetWithJsonDocument()) { @@ -183,9 +183,9 @@ TString THelper::GetTestTableSchema() const { TStringBuilder sb; sb << R"(Columns{ Name: "timestamp" Type : "Timestamp" NotNull : true })"; sb << R"(Columns{ Name: "resource_id" Type : "Utf8" })"; - sb << R"(Columns{ Name: "uid" Type : "Utf8" })"; + sb << "Columns{ Name: \"uid\" Type : \"Utf8\" NotNull : true StorageId : \"" + OptionalStorageId + "\" }"; sb << R"(Columns{ Name: "level" Type : "Int32" })"; - sb << R"(Columns{ Name: "message" Type : "Utf8" })"; + sb << "Columns{ Name: \"message\" Type : \"Utf8\" StorageId : \"" + OptionalStorageId + "\" }"; if (GetWithJsonDocument()) { sb << R"(Columns{ Name: "json_payload" Type : "JsonDocument" })"; } @@ -425,11 +425,11 @@ std::shared_ptr TTableWithNullsHelper::TestArrowBatch(ui64, Y_ABORT_UNLESS(bResourceId.AppendNull().ok()); Y_ABORT_UNLESS(bLevel.Append(i).ok()); Y_ABORT_UNLESS(bBinaryStr.AppendNull().ok()); - Y_ABORT_UNLESS(bJsonVal.Append(std::string(R"({"col1": "val1", "obj": {"obj_col2_int": 16}})")).ok()); + Y_ABORT_UNLESS(bJsonVal.Append(std::string(R"({"col1": "val1", "col-abc": "val-abc", "obj": {"obj_col2_int": 16}})")).ok()); Y_ABORT_UNLESS(bJsonDoc.AppendNull().ok()); } - const auto maybeJsonDoc = std::string(R"({"col1": "val1", "obj": {"obj_col2_int": 16}})"); + const auto maybeJsonDoc = std::string(R"({"col1": "val1", "col-abc": "val-abc", "obj": {"obj_col2_int": 16}})"); for (size_t i = rowCount / 2 + 1; i <= rowCount; ++i) { Y_ABORT_UNLESS(bId.Append(i).ok()); Y_ABORT_UNLESS(bResourceId.Append(std::to_string(i)).ok()); diff --git a/ydb/core/testlib/cs_helper.h b/ydb/core/testlib/cs_helper.h index 8714eb0727d4..7a9e3dad1bf3 100644 --- a/ydb/core/testlib/cs_helper.h +++ b/ydb/core/testlib/cs_helper.h @@ -26,6 +26,7 @@ class THelper: public THelperSchemaless { std::shared_ptr GetArrowSchema() const; YDB_FLAG_ACCESSOR(WithJsonDocument, false); + YDB_ACCESSOR(TString, OptionalStorageId, "__MEMORY"); TString ShardingMethod = "HASH_FUNCTION_CONSISTENCY_64"; bool WithSomeNulls_ = false; protected: diff --git a/ydb/core/tx/columnshard/background_controller.cpp b/ydb/core/tx/columnshard/background_controller.cpp index 99010cbaa203..542251c2c25a 100644 --- a/ydb/core/tx/columnshard/background_controller.cpp +++ b/ydb/core/tx/columnshard/background_controller.cpp @@ -3,37 +3,11 @@ namespace NKikimr::NColumnShard { -void TBackgroundController::StartTtl(const NOlap::TColumnEngineChanges& changes) { - Y_ABORT_UNLESS(TtlPortions.empty()); - TtlPortions = changes.GetTouchedPortions(); -} - -bool TBackgroundController::StartCompaction(const NOlap::TPlanCompactionInfo& info, const NOlap::TColumnEngineChanges& changes) { +bool TBackgroundController::StartCompaction(const NOlap::TPlanCompactionInfo& info) { Y_ABORT_UNLESS(ActiveCompactionInfo.emplace(info.GetPathId(), info).second); - Y_ABORT_UNLESS(CompactionInfoPortions.emplace(info.GetPathId(), changes.GetTouchedPortions()).second); return true; } -THashSet TBackgroundController::GetConflictTTLPortions() const { - THashSet result = TtlPortions; - for (auto&& i : CompactionInfoPortions) { - for (auto&& g : i.second) { - Y_ABORT_UNLESS(result.emplace(g).second); - } - } - return result; -} - -THashSet TBackgroundController::GetConflictCompactionPortions() const { - THashSet result = TtlPortions; - for (auto&& i : CompactionInfoPortions) { - for (auto&& g : i.second) { - Y_ABORT_UNLESS(result.emplace(g).second); - } - } - return result; -} - void TBackgroundController::CheckDeadlines() { for (auto&& i : ActiveCompactionInfo) { if (TMonotonic::Now() - i.second.GetStartTime() > NOlap::TCompactionLimits::CompactionTimeout) { @@ -73,13 +47,4 @@ TString TBackgroundController::DebugStringIndexation() const { return sb; } -TString TBackgroundActivity::DebugString() const { - return TStringBuilder() - << "indexation:" << HasIndexation() << ";" - << "compaction:" << HasCompaction() << ";" - << "cleanup:" << HasCleanup() << ";" - << "ttl:" << HasTtl() << ";" - ; -} - } diff --git a/ydb/core/tx/columnshard/background_controller.h b/ydb/core/tx/columnshard/background_controller.h index fad724213dd5..b88e1be3bcea 100644 --- a/ydb/core/tx/columnshard/background_controller.h +++ b/ydb/core/tx/columnshard/background_controller.h @@ -8,52 +8,15 @@ class TColumnEngineChanges; namespace NKikimr::NColumnShard { -class TBackgroundActivity { -public: - enum EBackActivity : ui32 { - NONE = 0x00, - INDEX = 0x01, - COMPACT = 0x02, - CLEAN = 0x04, - TTL = 0x08, - ALL = 0xffff - }; - - static TBackgroundActivity Indexation() { return TBackgroundActivity(INDEX); } - static TBackgroundActivity Compaction() { return TBackgroundActivity(COMPACT); } - static TBackgroundActivity Cleanup() { return TBackgroundActivity(CLEAN); } - static TBackgroundActivity Ttl() { return TBackgroundActivity(TTL); } - static TBackgroundActivity All() { return TBackgroundActivity(ALL); } - static TBackgroundActivity None() { return TBackgroundActivity(NONE); } - - TBackgroundActivity() = default; - - bool HasIndexation() const { return Activity & INDEX; } - bool HasCompaction() const { return Activity & COMPACT; } - bool HasCleanup() const { return Activity & CLEAN; } - bool HasTtl() const { return Activity & TTL; } - bool HasAll() const { return Activity == ALL; } - - TString DebugString() const; - -private: - EBackActivity Activity = NONE; - - TBackgroundActivity(EBackActivity activity) - : Activity(activity) - {} -}; - class TBackgroundController { private: THashMap ActiveIndexationTasks; using TCurrentCompaction = THashMap; TCurrentCompaction ActiveCompactionInfo; - THashMap> CompactionInfoPortions; - bool ActiveCleanup = false; - THashSet TtlPortions; + bool ActiveCleanupPortions = false; + bool ActiveCleanupTables = false; YDB_READONLY(TMonotonic, LastIndexationInstant, TMonotonic::Zero()); public: THashSet GetConflictTTLPortions() const; @@ -62,10 +25,9 @@ class TBackgroundController { void CheckDeadlines(); void CheckDeadlinesIndexation(); - bool StartCompaction(const NOlap::TPlanCompactionInfo& info, const NOlap::TColumnEngineChanges& changes); + bool StartCompaction(const NOlap::TPlanCompactionInfo& info); void FinishCompaction(const NOlap::TPlanCompactionInfo& info) { Y_ABORT_UNLESS(ActiveCompactionInfo.erase(info.GetPathId())); - Y_ABORT_UNLESS(CompactionInfoPortions.erase(info.GetPathId())); } const TCurrentCompaction& GetActiveCompaction() const { return ActiveCompactionInfo; @@ -81,25 +43,28 @@ class TBackgroundController { return ActiveIndexationTasks.size(); } - void StartCleanup() { - Y_ABORT_UNLESS(!ActiveCleanup); - ActiveCleanup = true; + void StartCleanupPortions() { + Y_ABORT_UNLESS(!ActiveCleanupPortions); + ActiveCleanupPortions = true; } - void FinishCleanup() { - Y_ABORT_UNLESS(ActiveCleanup); - ActiveCleanup = false; + void FinishCleanupPortions() { + Y_ABORT_UNLESS(ActiveCleanupPortions); + ActiveCleanupPortions = false; } - bool IsCleanupActive() const { - return ActiveCleanup; + bool IsCleanupPortionsActive() const { + return ActiveCleanupPortions; } - void StartTtl(const NOlap::TColumnEngineChanges& changes); - void FinishTtl() { - Y_ABORT_UNLESS(!TtlPortions.empty()); - TtlPortions.clear(); + void StartCleanupTables() { + Y_ABORT_UNLESS(!ActiveCleanupTables); + ActiveCleanupTables = true; + } + void FinishCleanupTables() { + Y_ABORT_UNLESS(ActiveCleanupTables); + ActiveCleanupTables = false; } - bool IsTtlActive() const { - return !TtlPortions.empty(); + bool IsCleanupTablesActive() const { + return ActiveCleanupTables; } }; diff --git a/ydb/core/tx/columnshard/blob.cpp b/ydb/core/tx/columnshard/blob.cpp index 0a7a3ecbdb04..ec92f317f225 100644 --- a/ydb/core/tx/columnshard/blob.cpp +++ b/ydb/core/tx/columnshard/blob.cpp @@ -1,187 +1 @@ #include "blob.h" -#include "defs.h" - -#include - -namespace NKikimr::NOlap { - -// Format: "S3-f(logoBlobId)-group" -// Example: "S3-42-72075186224038245_51_31595_2_0_11952_0-2181038103" -TString DsIdToS3Key(const TUnifiedBlobId& dsid, const ui64 pathId) { - TString blobId = dsid.GetLogoBlobId().ToString(); - for (auto&& c : blobId) { - switch (c) { - case ':': - c = '_'; - break; - case '[': - case ']': - c = '-'; - } - } - TString result = - "S3-" + - ::ToString(pathId) + - blobId + - ::ToString(dsid.GetDsGroup()) - ; - return result; -} - -TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error, ui64& pathId) { - TVector keyBucket; - Split(s, "-", keyBucket); - - ui32 dsGroup; - if (keyBucket.size() != 4 || keyBucket[0] != "S3" - || !TryFromString(keyBucket[3], dsGroup) - || !TryFromString(keyBucket[1], pathId)) - { - error = TStringBuilder() << "Wrong S3 key '" << s << "'"; - return TUnifiedBlobId(); - } - - TString blobId = "[" + keyBucket[2] + "]"; - for (size_t i = 0; i < blobId.size(); ++i) { - switch (blobId[i]) { - case '_': - blobId[i] = ':'; - break; - } - } - - TLogoBlobID logoBlobId; - if (!TLogoBlobID::Parse(logoBlobId, blobId, error)) { - return TUnifiedBlobId(); - } - - return TUnifiedBlobId(dsGroup, logoBlobId); -} - -namespace { - -#define PARSE_INT_COMPONENT(fieldType, fieldName, endChar) \ - if (pos >= endPos) { \ - error = "Failed to parse " #fieldName " component"; \ - return TUnifiedBlobId(); \ - } \ - fieldType fieldName = -1; \ - { \ - auto [ptr, ec] { std::from_chars(str + pos, str + endPos, fieldName) }; \ - if (ec != std::errc()) { \ - error = "Failed to parse " #fieldName " component"; \ - return TUnifiedBlobId(); \ - } else { \ - pos = ptr - str; \ - } \ - if (str[pos++] != endChar) { \ - error = #endChar " not found after " #fieldName; \ - return TUnifiedBlobId(); \ - } \ - } - -// Format: "DS:group:logoBlobId" -// Example: "DS:2181038103:[72075186224038245:51:31595:2:0:11952:0]" -TUnifiedBlobId ParseExtendedDsBlobId(const TString& s, TString& error) { - Y_ABORT_UNLESS(s.size() > 2); - const char* str = s.c_str(); - Y_ABORT_UNLESS(str[0] == 'D' && str[1] == 'S'); - i64 pos = 2; - i64 endPos = s.size(); - if (str[pos++] != ':') { - error = "Starting ':' not found"; - return TUnifiedBlobId(); - } - - PARSE_INT_COMPONENT(ui32, dsGroup, ':'); - - TLogoBlobID logoBlobId; - if (!TLogoBlobID::Parse(logoBlobId, s.substr(pos), error)) { - return TUnifiedBlobId(); - } - - return TUnifiedBlobId(dsGroup, logoBlobId); -} - -// Format: "SM[tabletId:generation:step:cookie:size]" -// Example: "SM[72075186224038245:51:31184:0:2528]" -TUnifiedBlobId ParseSmallBlobId(const TString& s, TString& error) { - Y_ABORT_UNLESS(s.size() > 2); - const char* str = s.c_str(); - Y_ABORT_UNLESS(str[0] == 'S' && str[1] == 'M'); - i64 pos = 2; - i64 endPos = s.size(); - if (str[pos++] != '[') { - error = "opening [ not found"; - return TUnifiedBlobId(); - } - - PARSE_INT_COMPONENT(ui64, tabletId, ':'); - PARSE_INT_COMPONENT(ui32, gen, ':'); - PARSE_INT_COMPONENT(ui32, step, ':'); - PARSE_INT_COMPONENT(ui32, cookie, ':'); - PARSE_INT_COMPONENT(ui32, size, ']'); - - if (pos != endPos) { - error = "Extra characters after closing ]"; - return TUnifiedBlobId(); - } - - return TUnifiedBlobId(tabletId, gen, step, cookie, size); -} - -// Format: "s = S3_key" -TUnifiedBlobId ParseS3BlobId(const TString& s, TString& error) { - ui64 pathId; - TUnifiedBlobId dsBlobId = S3KeyToDsId(s, error, pathId); - if (!dsBlobId.IsValid()) { - return TUnifiedBlobId(); - } - - return TUnifiedBlobId(dsBlobId, TUnifiedBlobId::S3_BLOB, pathId); -} - -} - -TUnifiedBlobId TUnifiedBlobId::ParseFromString(const TString& str, - const IBlobGroupSelector* dsGroupSelector, TString& error) -{ - if (str.size() <= 2) { - error = TStringBuilder() << "Wrong blob id: '" << str << "'"; - return TUnifiedBlobId(); - } - - if (str[0] == '[') { - // If blobId starts with '[' this must be a logoblobId and if channel is set to FAKE_CHANNEL - // this is a fake logoblobid used for small blob - TLogoBlobID logoBlobId; - bool parsed = TLogoBlobID::Parse(logoBlobId, str, error); - if (!parsed) { - error = "Cannot parse TLogoBlobID: " + error; - return TUnifiedBlobId(); - } - if (logoBlobId.Channel() == TSmallBlobId::FAKE_CHANNEL) { - // Small blob - return TUnifiedBlobId(logoBlobId.TabletID(), logoBlobId.Generation(), logoBlobId.Step(), - logoBlobId.Cookie(), logoBlobId.BlobSize()); - } else { - // DS blob - if (!dsGroupSelector) { - error = "Need TBlobGroupSelector to resolve DS group for the blob"; - return TUnifiedBlobId(); - } - return TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); - } - } else if (str[0] == 'D' && str[1] == 'S') { - return ParseExtendedDsBlobId(str, error); - } else if (str[0] == 'S' && str[1] == 'M') { - return ParseSmallBlobId(str, error); - } else if (str[0] == 'S' && str[1] == '3') { - return ParseS3BlobId(str, error); - } - - error = TStringBuilder() << "Wrong blob id: '" << str << "'"; - return TUnifiedBlobId(); -} - -} diff --git a/ydb/core/tx/columnshard/blob.h b/ydb/core/tx/columnshard/blob.h index 0c3cb27bdb3c..4a2dad81a540 100644 --- a/ydb/core/tx/columnshard/blob.h +++ b/ydb/core/tx/columnshard/blob.h @@ -1,448 +1,3 @@ #pragma once -#include - -#include - -namespace NKikimr::NOlap { - -class IBlobGroupSelector; -class TUnifiedBlobId; - -TString DsIdToS3Key(const TUnifiedBlobId& dsid, const ui64 pathId); -TUnifiedBlobId S3KeyToDsId(const TString& s, TString& error, ui64& pathId); - -// Encapsulates different types of blob ids to simplify dealing with blobs for the -// components that do not need to know where the blob is stored -// Blob id formats: -// * Old DS blob id: just "logoBlobId" e.g. "[72075186224038245:51:31595:2:0:11952:0]" -// * DS blob id: "DS:dsGroup:logoBlobId" e.g. "DS:2181038103:[72075186224038245:51:31595:2:0:11952:0]" -// * Small blob id: "SM[tabletId:generation:step:cookie:size]" e.g. "SM[72075186224038245:51:31184:0:2528]" -class TUnifiedBlobId { - struct TInvalid { - bool operator == (const TInvalid&) const { return true; } - }; - - // Id of a blob in YDB distributed storage - struct TDsBlobId { - TLogoBlobID BlobId; - ui32 DsGroup; - - bool operator == (const TDsBlobId& other) const { - return BlobId == other.BlobId && DsGroup == other.DsGroup; - } - - TString ToStringNew() const { - return Sprintf( "DS:%" PRIu32 ":%s", DsGroup, BlobId.ToString().c_str()); - } - - TString ToStringLegacy() const { - return BlobId.ToString(); - } - - ui64 Hash() const { - return CombineHashes(BlobId.Hash(), IntHash(DsGroup)); - } - }; - - // Id of a blob that is stored in Tablet local DB table - struct TSmallBlobId { - static constexpr ui8 FAKE_CHANNEL = 255; // Small blob id can be represented as - // a fake TLogoBlobID with channel = FAKE_CHANNEL - - ui64 TabletId; - ui32 Gen; - ui32 Step; - ui32 Cookie; - ui32 Size; - - bool operator == (const TSmallBlobId& other) const { - return TabletId == other.TabletId && - Gen == other.Gen && - Step == other.Step && - Cookie == other.Cookie && - Size == other.Size; - } - - TString ToStringNew() const { - return Sprintf( "SM[%" PRIu64 ":%" PRIu32 ":%" PRIu32 ":%" PRIu32 ":%" PRIu32 "]", - TabletId, Gen, Step, Cookie, Size); - } - - TString ToStringLegacy() const { - // For compatibility with preproduction version small blobs can also be - // addressed by fake TlogoBlobID with channel = 255 - return TLogoBlobID(TabletId, Gen, Step, FAKE_CHANNEL, Size, Cookie).ToString(); - } - - ui64 Hash() const { - ui64 hash = IntHash(TabletId); - hash = CombineHashes(hash, IntHash(Gen)); - hash = CombineHashes(hash, IntHash(Step)); - hash = CombineHashes(hash, IntHash(Cookie)); - hash = CombineHashes(hash, IntHash(Size)); - return hash; - } - }; - - struct TS3BlobId { - TDsBlobId DsBlobId; - TString Key; - - TS3BlobId() = default; - - TS3BlobId(const TUnifiedBlobId& dsBlob, const ui64 pathId) - { - Y_ABORT_UNLESS(dsBlob.IsDsBlob()); - DsBlobId = std::get(dsBlob.Id); - Key = DsIdToS3Key(dsBlob, pathId); - } - - bool operator == (const TS3BlobId& other) const { - return Key == other.Key; - } - - TString ToStringNew() const { - return Sprintf("%s", Key.c_str()); - } - - ui64 Hash() const { - return IntHash(THash()(Key)); - } - }; - - std::variant< - TInvalid, - TDsBlobId, - TSmallBlobId, - TS3BlobId - > Id; - -public: - enum EBlobType { - INVALID = 0, - DS_BLOB = 1, - TABLET_SMALL_BLOB = 2, - S3_BLOB = 3, - }; - - TUnifiedBlobId() - : Id(TInvalid()) - {} - - // Initialize as DS blob Id - TUnifiedBlobId(ui32 dsGroup, const TLogoBlobID& logoBlobId) - : Id(TDsBlobId{logoBlobId, dsGroup}) - {} - - // Initialize as Small blob Id - TUnifiedBlobId(ui64 tabletId, ui32 gen, ui32 step, ui32 cookie, ui32 size) - : Id(TSmallBlobId{tabletId, gen, step, cookie, size}) - {} - - // Make S3 blob Id from DS one - TUnifiedBlobId(const TUnifiedBlobId& blob, EBlobType type, const ui64 pathId) - : Id(TS3BlobId(blob, pathId)) - { - Y_ABORT_UNLESS(type == S3_BLOB); - } - - TUnifiedBlobId(const TUnifiedBlobId& other) = default; - TUnifiedBlobId& operator = (const TUnifiedBlobId& logoBlobId) = default; - TUnifiedBlobId(TUnifiedBlobId&& other) = default; - TUnifiedBlobId& operator = (TUnifiedBlobId&& logoBlobId) = default; - - TUnifiedBlobId MakeS3BlobId(ui64 pathId) const { - Y_ABORT_UNLESS(IsDsBlob()); - return TUnifiedBlobId(*this, TUnifiedBlobId::S3_BLOB, pathId); - } - - static TUnifiedBlobId ParseFromString(const TString& str, - const IBlobGroupSelector* dsGroupSelector, TString& error); - - bool operator == (const TUnifiedBlobId& other) const { - return Id == other.Id; - } - - EBlobType GetType() const { - return (EBlobType)Id.index(); - } - - bool IsValid() const { - return Id.index() != INVALID; - } - - size_t BlobSize() const { - switch (Id.index()) { - case DS_BLOB: - return std::get(Id).BlobId.BlobSize(); - case TABLET_SMALL_BLOB: - return std::get(Id).Size; - case S3_BLOB: - return std::get(Id).DsBlobId.BlobId.BlobSize(); - case INVALID: - Y_ABORT("Invalid blob id"); - } - Y_ABORT(); - } - - bool IsSmallBlob() const { - return GetType() == TABLET_SMALL_BLOB; - } - - bool IsDsBlob() const { - return GetType() == DS_BLOB; - } - - bool IsS3Blob() const { - return GetType() == S3_BLOB; - } - - TLogoBlobID GetLogoBlobId() const { - Y_ABORT_UNLESS(IsDsBlob()); - return std::get(Id).BlobId; - } - - ui32 GetDsGroup() const { - Y_ABORT_UNLESS(IsDsBlob()); - return std::get(Id).DsGroup; - } - - TString GetS3Key() const { - Y_ABORT_UNLESS(IsS3Blob()); - return std::get(Id).Key; - } - - ui64 GetTabletId() const { - switch (Id.index()) { - case DS_BLOB: - return std::get(Id).BlobId.TabletID(); - case TABLET_SMALL_BLOB: - return std::get(Id).TabletId; - case S3_BLOB: - return std::get(Id).DsBlobId.BlobId.TabletID(); - case INVALID: - Y_ABORT("Invalid blob id"); - } - Y_ABORT(); - } - - ui64 Hash() const noexcept { - switch (Id.index()) { - case INVALID: - return 0; - case DS_BLOB: - return std::get(Id).Hash(); - case TABLET_SMALL_BLOB: - return std::get(Id).Hash(); - case S3_BLOB: - return std::get(Id).Hash(); - } - Y_ABORT(); - } - - // This is only implemented for DS for backward compatibility with persisted data. - // All new functionality should rahter use string blob id representation - TString SerializeBinary() const { - Y_ABORT_UNLESS(IsDsBlob()); - return TString((const char*)GetLogoBlobId().GetRaw(), sizeof(TLogoBlobID)); - } - - TString ToStringLegacy() const { - switch (Id.index()) { - case DS_BLOB: - return std::get(Id).ToStringLegacy(); - case TABLET_SMALL_BLOB: - return std::get(Id).ToStringLegacy(); - case S3_BLOB: - Y_ABORT("Not implemented"); - case INVALID: - return ""; - } - Y_ABORT(); - } - - TString ToStringNew() const { - switch (Id.index()) { - case DS_BLOB: - return std::get(Id).ToStringNew(); - case TABLET_SMALL_BLOB: - return std::get(Id).ToStringNew(); - case S3_BLOB: - return std::get(Id).ToStringNew(); - case INVALID: - return ""; - } - Y_ABORT(); - } -}; - - -// Describes a range of bytes in a blob. It is used for read requests and for caching. -struct TBlobRange { - TUnifiedBlobId BlobId; - ui32 Offset; - ui32 Size; - - const TUnifiedBlobId& GetBlobId() const { - return BlobId; - } - - bool IsValid() const { - return BlobId.IsValid() && Size && Offset + Size <= BlobId.BlobSize(); - } - - ui32 GetBlobSize() const { - return Size; - } - - bool IsFullBlob() const { - return Size == BlobId.BlobSize(); - } - - explicit TBlobRange(const TUnifiedBlobId& blobId = TUnifiedBlobId(), ui32 offset = 0, ui32 size = 0) - : BlobId(blobId) - , Offset(offset) - , Size(size) - { - if (Size > 0) { - Y_ABORT_UNLESS(Offset < BlobId.BlobSize()); - Y_ABORT_UNLESS(Offset + Size <= BlobId.BlobSize()); - } - } - - static TBlobRange FromBlobId(const TUnifiedBlobId& blobId) { - return TBlobRange(blobId, 0, blobId.BlobSize()); - } - - bool operator == (const TBlobRange& other) const { - return - BlobId == other.BlobId && - Offset == other.Offset && - Size == other.Size; - } - - ui64 Hash() const noexcept { - ui64 hash = BlobId.Hash(); - hash = CombineHashes(hash, IntHash(Offset)); - hash = CombineHashes(hash, IntHash(Size)); - return hash; - } - - TString ToString() const { - return Sprintf("{ Blob: %s Offset: %" PRIu32 " Size: %" PRIu32 " }", - BlobId.ToStringNew().c_str(), Offset, Size); - } -}; - -class IBlobInUseTracker { -private: - virtual bool DoFreeBlob(const NOlap::TUnifiedBlobId& blobId) = 0; - virtual bool DoUseBlob(const NOlap::TUnifiedBlobId& blobId) = 0; -public: - virtual ~IBlobInUseTracker() = default; - - bool FreeBlob(const NOlap::TUnifiedBlobId& blobId) { - return DoFreeBlob(blobId); - } - bool UseBlob(const NOlap::TUnifiedBlobId& blobId) { - return DoUseBlob(blobId); - } - - virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const = 0; -}; - -// Expected blob lifecycle: EVICTING -> SELF_CACHED -> EXTERN <-> CACHED -enum class EEvictState : ui8 { - UNKNOWN = 0, - EVICTING = 1, // source, extern, cached blobs: 1-- - SELF_CACHED = 2, // source, extern, cached blobs: 11- - EXTERN = 3, // source, extern, cached blobs: -1- - CACHED = 4, // source, extern, cached blobs: -11 - ERASING = 5, // source, extern, cached blobs: -?? - //ERASED = 6, // source, extern, cached blobs: --- -}; - -inline bool IsExported(EEvictState state) { - return state == EEvictState::SELF_CACHED || - state == EEvictState::EXTERN || - state == EEvictState::CACHED; -} - -inline bool CouldBeExported(EEvictState state) { - return state == EEvictState::SELF_CACHED || - state == EEvictState::EXTERN || - state == EEvictState::CACHED || - state == EEvictState::ERASING; -} - -inline bool IsDeleted(EEvictState state) { - return ui8(state) >= ui8(EEvictState::EXTERN); // !EVICTING and !SELF_CACHED -} - -struct TEvictedBlob { - EEvictState State = EEvictState::UNKNOWN; - TUnifiedBlobId Blob; - TUnifiedBlobId ExternBlob; - TUnifiedBlobId CachedBlob; - - bool operator == (const TEvictedBlob& other) const { - return Blob == other.Blob; - } - - ui64 Hash() const noexcept { - return Blob.Hash(); - } - - bool IsEvicting() const { - return State == EEvictState::EVICTING; - } - - bool IsExternal() const { - if (State == EEvictState::EXTERN) { - Y_ABORT_UNLESS(ExternBlob.IsValid()); - return true; - } - return false; - } - - TString ToString() const { - return TStringBuilder() << "state: " << (ui32)State - << " blob: " << Blob.ToStringNew() - << " extern: " << ExternBlob.ToStringNew() - << " cached: " << CachedBlob.ToStringNew(); - } -}; - -} - -inline -IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TUnifiedBlobId& blobId) { - return out << blobId.ToStringNew(); -} - -inline -IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TBlobRange& blobRange) { - return out << blobRange.ToString(); -} - -template<> -struct ::THash { - inline ui64 operator()(const NKikimr::NOlap::TUnifiedBlobId& a) const { - return a.Hash(); - } -}; - -template <> -struct THash { - inline size_t operator() (const NKikimr::NOlap::TBlobRange& key) const { - return key.Hash(); - } -}; - -template <> -struct THash { - inline size_t operator() (const NKikimr::NOlap::TEvictedBlob& key) const { - return key.Hash(); - } -}; +#include "common/blob.h" diff --git a/ydb/core/tx/columnshard/blob_cache.cpp b/ydb/core/tx/columnshard/blob_cache.cpp index 123fefbbacea..0287b876d509 100644 --- a/ydb/core/tx/columnshard/blob_cache.cpp +++ b/ydb/core/tx/columnshard/blob_cache.cpp @@ -62,18 +62,8 @@ class TBlobCache: public TActorBootstrapped { // (e.g. DS blobs from the same tablet residing on the same DS group, or 2 small blobs from the same tablet) std::tuple BlobSource() const { const TUnifiedBlobId& blobId = BlobRange.BlobId; - Y_ABORT_UNLESS(blobId.IsValid()); - - if (blobId.IsDsBlob()) { - // Tablet & group restriction - return {blobId.GetTabletId(), blobId.GetDsGroup(), ReadVariant()}; - } else if (blobId.IsSmallBlob()) { - // Tablet restriction, no group restrictions - return {blobId.GetTabletId(), 0, ReadVariant()}; - } - - return {0, 0, EReadVariant::FAST}; + return {blobId.GetTabletId(), blobId.GetDsGroup(), ReadVariant()}; } }; @@ -409,7 +399,6 @@ class TBlobCache: public TActorBootstrapped { ui64 requestSize = 0; ui32 dsGroup = std::get<1>(target); TReadItem::EReadVariant readVariant = std::get<2>(target); - Y_ABORT_UNLESS(rangesGroup.begin()->BlobId.IsDsBlob()); std::vector dsReads; diff --git a/ydb/core/tx/columnshard/blob_cache.h b/ydb/core/tx/columnshard/blob_cache.h index d65dc6aa6242..75e0ccf0a677 100644 --- a/ydb/core/tx/columnshard/blob_cache.h +++ b/ydb/core/tx/columnshard/blob_cache.h @@ -74,12 +74,14 @@ struct TEvBlobCache { TString Data; const bool FromCache = false; const TInstant ConstructTime = Now(); + const TString DataSourceId; - TEvReadBlobRangeResult(const TBlobRange& blobRange, NKikimrProto::EReplyStatus status, const TString& data, const bool fromCache = false) + TEvReadBlobRangeResult(const TBlobRange& blobRange, NKikimrProto::EReplyStatus status, const TString& data, const bool fromCache = false, const TString& dataSourceId = Default()) : BlobRange(blobRange) , Status(status) , Data(data) , FromCache(fromCache) + , DataSourceId(dataSourceId) {} }; diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp index fdd3fd5adba1..f1d2e4e051ee 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/action.cpp @@ -3,16 +3,4 @@ namespace NKikimr::NOlap { -std::shared_ptr TBlobsAction::GetWriting(const TPortionInfo& portionInfo) { - return GetStorageAction(portionInfo.GetBlobsStorage()->GetStorageId()).GetWriting(ConsumerId); -} - -std::shared_ptr TBlobsAction::GetReading(const TPortionInfo& portionInfo) { - return GetStorageAction(portionInfo.GetBlobsStorage()->GetStorageId()).GetReading(ConsumerId); -} - -std::shared_ptr TBlobsAction::GetRemoving(const TPortionInfo& portionInfo) { - return GetStorageAction(portionInfo.GetBlobsStorage()->GetStorageId()).GetRemoving(ConsumerId); -} - } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/action.h b/ydb/core/tx/columnshard/blobs_action/abstract/action.h index 6f10c41d9169..90d5959e51c4 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/action.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/action.h @@ -22,13 +22,13 @@ class TStorageAction { } - const std::shared_ptr& GetRemoving(const TString& consumerId) { + const std::shared_ptr& GetRemoving(const NBlobOperations::EConsumer consumerId) { if (!Removing) { Removing = Storage->StartDeclareRemovingAction(consumerId); } return Removing; } - const std::shared_ptr& GetWriting(const TString& consumerId) { + const std::shared_ptr& GetWriting(const NBlobOperations::EConsumer consumerId) { if (!Writing) { Writing = Storage->StartWritingAction(consumerId); } @@ -37,7 +37,7 @@ class TStorageAction { const std::shared_ptr& GetWritingOptional() const { return Writing; } - const std::shared_ptr& GetReading(const TString& consumerId) { + const std::shared_ptr& GetReading(const NBlobOperations::EConsumer consumerId) { if (!Reading) { Reading = Storage->StartReadingAction(consumerId); } @@ -55,9 +55,9 @@ class TStorageAction { return !!Writing; } - void OnExecuteTxAfterAction(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { + void OnExecuteTxAfterAction(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { if (Removing) { - Removing->OnExecuteTxAfterRemoving(self, dbBlobs, blobsWroteSuccessfully); + Removing->OnExecuteTxAfterRemoving(dbBlobs, blobsWroteSuccessfully); } if (Writing) { Writing->OnExecuteTxAfterWrite(self, dbBlobs, blobsWroteSuccessfully); @@ -66,7 +66,7 @@ class TStorageAction { void OnCompleteTxAfterAction(NColumnShard::TColumnShard& self, const bool blobsWroteSuccessfully) { if (Removing) { - Removing->OnCompleteTxAfterRemoving(self, blobsWroteSuccessfully); + Removing->OnCompleteTxAfterRemoving(blobsWroteSuccessfully); } if (Writing) { Writing->OnCompleteTxAfterWrite(self, blobsWroteSuccessfully); @@ -78,7 +78,7 @@ class TBlobsAction { private: std::shared_ptr Storages; THashMap StorageActions; - const TString ConsumerId; + const NBlobOperations::EConsumer ConsumerId; TStorageAction& GetStorageAction(const TString& storageId) { auto it = StorageActions.find(storageId); @@ -88,13 +88,21 @@ class TBlobsAction { return it->second; } public: - explicit TBlobsAction(std::shared_ptr storages, const TString& consumerId) + explicit TBlobsAction(std::shared_ptr storages, const NBlobOperations::EConsumer consumerId) : Storages(storages) , ConsumerId(consumerId) { } + TString GetStorageIds() const { + TStringBuilder sb; + for (auto&& i : StorageActions) { + sb << i.first << ","; + } + return sb; + } + ui32 GetWritingBlobsCount() const { ui32 result = 0; for (auto&& [_, action] : StorageActions) { @@ -135,16 +143,22 @@ class TBlobsAction { return result; } - bool NeedDraftWritingTransaction() const { + [[nodiscard]] TConclusion NeedDraftWritingTransaction() const { + bool hasWriting = false; for (auto&& i : GetWritingActions()) { if (i->NeedDraftTransaction()) { return true; } + hasWriting = true; + } + if (hasWriting) { + return false; + } else { + return TConclusionStatus::Fail("has not writings"); } - return false; } - void OnExecuteTxAfterAction(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { + void OnExecuteTxAfterAction(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { for (auto&& i : StorageActions) { i.second.OnExecuteTxAfterAction(self, dbBlobs, blobsWroteSuccessfully); } @@ -160,20 +174,14 @@ class TBlobsAction { return GetStorageAction(storageId).GetRemoving(ConsumerId); } - std::shared_ptr GetRemoving(const TPortionInfo& portionInfo); - std::shared_ptr GetWriting(const TString& storageId) { return GetStorageAction(storageId).GetWriting(ConsumerId); } - std::shared_ptr GetWriting(const TPortionInfo& portionInfo); - std::shared_ptr GetReading(const TString& storageId) { return GetStorageAction(storageId).GetReading(ConsumerId); } - std::shared_ptr GetReading(const TPortionInfo& portionInfo); - }; } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.cpp new file mode 100644 index 000000000000..b0952746b9ba --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.cpp @@ -0,0 +1,57 @@ +#include "blob_set.h" + +#include + +#include + +namespace NKikimr::NOlap { + +NKikimrColumnShardBlobOperationsProto::TTabletByBlob TTabletByBlob::SerializeToProto() const { + NKikimrColumnShardBlobOperationsProto::TTabletByBlob result; + for (auto&& i : Data) { + auto* blobsProto = result.AddBlobs(); + blobsProto->SetBlobId(i.first.ToStringNew()); + blobsProto->SetTabletId((ui64)i.second); + } + return result; +} + +NKikimr::TConclusionStatus TTabletByBlob::DeserializeFromProto(const NKikimrColumnShardBlobOperationsProto::TTabletByBlob& proto) { + for (auto&& i : proto.GetBlobs()) { + auto parse = TUnifiedBlobId::BuildFromString(i.GetBlobId(), nullptr); + if (!parse) { + return parse; + } + AFL_VERIFY(Data.emplace(*parse, (TTabletId)i.GetTabletId()).second); + } + return TConclusionStatus::Success(); +} + +NKikimrColumnShardBlobOperationsProto::TTabletsByBlob TTabletsByBlob::SerializeToProto() const { + NKikimrColumnShardBlobOperationsProto::TTabletsByBlob result; + for (auto&& i : Data) { + auto* blobsProto = result.AddBlobs(); + blobsProto->SetBlobId(i.first.ToStringNew()); + for (auto&& t : i.second) { + blobsProto->AddTabletIds((ui64)t); + } + } + return result; +} + +NKikimr::TConclusionStatus TTabletsByBlob::DeserializeFromProto(const NKikimrColumnShardBlobOperationsProto::TTabletsByBlob& proto) { + for (auto&& i : proto.GetBlobs()) { + auto parse = TUnifiedBlobId::BuildFromString(i.GetBlobId(), nullptr); + if (!parse) { + return parse; + } + auto it = Data.emplace(*parse, THashSet()).first; + for (auto&& t : i.GetTabletIds()) { + AFL_VERIFY(it->second.emplace((TTabletId)t).second); + ++Size; + } + } + return TConclusionStatus::Success(); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h new file mode 100644 index 000000000000..7692dad22eb2 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/blob_set.h @@ -0,0 +1,512 @@ +#pragma once +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace NKikimrColumnShardBlobOperationsProto { +class TTabletByBlob; +class TTabletsByBlob; +} + +namespace NKikimr::NOlap { + +class TTabletByBlob { +private: + THashMap Data; +public: + NKikimrColumnShardBlobOperationsProto::TTabletByBlob SerializeToProto() const; + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardBlobOperationsProto::TTabletByBlob& proto); + + THashMap::const_iterator begin() const { + return Data.begin(); + } + + THashMap::const_iterator end() const { + return Data.end(); + } + + const THashMap* operator->() const { + return &Data; + } + + THashMap* operator->() { + return &Data; + } + +}; + +class TTabletsByBlob { +private: + THashMap> Data; + i32 Size = 0; +public: + ui32 GetSize() const { + return Size; + } + + void Clear() { + Data.clear(); + Size = 0; + } + + NKikimrColumnShardBlobOperationsProto::TTabletsByBlob SerializeToProto() const; + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardBlobOperationsProto::TTabletsByBlob& proto); + + THashMap>::const_iterator begin() const { + return Data.begin(); + } + + THashMap>::const_iterator end() const { + return Data.end(); + } + + bool Contains(const TTabletId tabletId, const TUnifiedBlobId& blobId) const { + auto it = Data.find(blobId); + if (it == Data.end()) { + return false; + } + return it->second.contains(tabletId); + } + + const THashSet* Find(const TUnifiedBlobId& blobId) const { + auto it = Data.find(blobId); + if (it == Data.end()) { + return nullptr; + } + return &it->second; + } + + bool IsEmpty() const { + return Data.empty(); + } + + template + TTabletsByBlob ExtractBlobs(const TFilter& filter, const std::optional countLimit = {}) { + TTabletsByBlob result; + THashSet idsRemove; + ui32 count = 0; + for (auto&& i : Data) { + if (filter(i.first, i.second)) { + idsRemove.emplace(i.first); + Size -= i.second.size(); + result.Data.emplace(i.first, i.second); + result.Size += i.second.size(); + count += i.second.size(); + if (countLimit && count >= *countLimit) { + break; + } + } + } + for (auto&& i : idsRemove) { + Data.erase(i); + } + return result; + } + + class TIterator { + private: + const TTabletsByBlob& Owner; + THashMap>::const_iterator BlobsIterator; + THashSet::const_iterator TabletsIterator; + public: + TIterator(const TTabletsByBlob& owner) + : Owner(owner) { + BlobsIterator = Owner.Data.begin(); + if (BlobsIterator != Owner.Data.end()) { + TabletsIterator = BlobsIterator->second.begin(); + } + } + + const TUnifiedBlobId& GetBlobId() const { + AFL_VERIFY(IsValid()); + return BlobsIterator->first; + } + + TTabletId GetTabletId() const { + AFL_VERIFY(IsValid()); + return *TabletsIterator; + } + + bool IsValid() const { + return BlobsIterator != Owner.Data.end() && TabletsIterator != BlobsIterator->second.end(); + } + + void operator++() { + AFL_VERIFY(IsValid()); + ++TabletsIterator; + if (TabletsIterator == BlobsIterator->second.end()) { + ++BlobsIterator; + if (BlobsIterator != Owner.Data.end()) { + TabletsIterator = BlobsIterator->second.begin(); + } + } + } + }; + + TIterator GetIterator() const { + return TIterator(*this); + } + + bool ExtractFront(TTabletId& tabletId, TUnifiedBlobId& blobId) { + if (Data.empty()) { + return false; + } + auto& b = Data.begin()->second; + AFL_VERIFY(b.size()); + tabletId = *b.begin(); + blobId = Data.begin()->first; + b.erase(b.begin()); + if (b.empty()) { + Data.erase(Data.begin()); + } + AFL_VERIFY(--Size >= 0); + return true; + } + + bool ExtractFrontTo(TTabletsByBlob& dest) { + TTabletId tabletId; + TUnifiedBlobId blobId; + if (!ExtractFront(tabletId, blobId)) { + return false; + } + AFL_VERIFY(dest.Add(tabletId, blobId)); + return true; + } + + bool ExtractBlobTo(const TUnifiedBlobId& blobId, TTabletsByBlob& dest) { + auto it = Data.find(blobId); + if (it == Data.end()) { + return false; + } + AFL_VERIFY(dest.Add(blobId, it->second)); + Size -= it->second.size(); + AFL_VERIFY(Size >= 0); + Data.erase(it); + return true; + } + + bool Add(const TTabletId tabletId, const TUnifiedBlobId& blobId) { + THashSet hashSet = {blobId}; + return Add(tabletId, hashSet); + } + + bool Add(const TTabletsByBlob& blobs) { + bool uniqueOnly = true; + for (auto&& i : blobs.Data) { + if (!Add(i.first, i.second)) { + uniqueOnly = false; + } + } + return uniqueOnly; + } + + bool Add(const TTabletId tabletId, const THashSet& blobIds) { + bool hasSkipped = false; + for (auto&& i : blobIds) { + auto it = Data.find(i); + if (it == Data.end()) { + THashSet tabletsLocal = {tabletId}; + it = Data.emplace(i, tabletsLocal).first; + Size += 1; + } else { + if (!it->second.emplace(tabletId).second) { + hasSkipped = true; + } else { + Size += 1; + } + } + } + return !hasSkipped; + } + + bool Add(const TUnifiedBlobId& blobId, const THashSet& tabletIds) { + bool hasSkipped = false; + if (tabletIds.empty()) { + return true; + } + auto& hashSet = Data[blobId]; + for (auto&& i : tabletIds) { + if (!hashSet.emplace(i).second) { + hasSkipped = true; + } else { + Size += 1; + } + } + return !hasSkipped; + } + + bool Remove(const TTabletId tabletId, const TUnifiedBlobId& blobId) { + auto it = Data.find(blobId); + if (it == Data.end()) { + return false; + } + auto itTablet = it->second.find(tabletId); + if (itTablet == it->second.end()) { + return false; + } + it->second.erase(itTablet); + AFL_VERIFY(--Size >= 0); + if (it->second.empty()) { + Data.erase(it); + } + return true; + } +}; + +class TBlobsByTablet { +private: + THashMap> Data; +public: + class TIterator { + private: + const TBlobsByTablet* Owner; + THashMap>::const_iterator TabletsIterator; + THashSet::const_iterator BlobsIterator; + public: + TIterator(const TBlobsByTablet& owner) + : Owner(&owner) + { + TabletsIterator = Owner->Data.begin(); + if (TabletsIterator != Owner->Data.end()) { + BlobsIterator = TabletsIterator->second.begin(); + } + } + + const TUnifiedBlobId& GetBlobId() const { + AFL_VERIFY(IsValid()); + return *BlobsIterator; + } + + TTabletId GetTabletId() const { + AFL_VERIFY(IsValid()); + return TabletsIterator->first; + } + + bool IsValid() const { + return TabletsIterator != Owner->Data.end() && BlobsIterator != TabletsIterator->second.end(); + } + + void operator++() { + AFL_VERIFY(IsValid()); + ++BlobsIterator; + if (BlobsIterator == TabletsIterator->second.end()) { + ++TabletsIterator; + if (TabletsIterator != Owner->Data.end()) { + BlobsIterator = TabletsIterator->second.begin(); + } + } + } + }; + + TIterator GetIterator() const { + return TIterator(*this); + } + + THashMap>::const_iterator begin() const { + return Data.begin(); + } + + THashMap>::const_iterator end() const { + return Data.end(); + } + + void GetFront(const ui32 count, TBlobsByTablet& result) const { + TBlobsByTablet resultLocal; + ui32 countReady = 0; + for (auto&& i : Data) { + for (auto&& b : i.second) { + if (countReady >= count) { + std::swap(result, resultLocal); + return; + } + resultLocal.Add(i.first, b); + ++countReady; + } + } + std::swap(result, resultLocal); + } + + const THashSet* Find(const TTabletId tabletId) const { + auto it = Data.find(tabletId); + if (it == Data.end()) { + return nullptr; + } + return &it->second; + } + + void ExtractFront(const ui32 count, TBlobsByTablet* result) { + TBlobsByTablet resultLocal; + ui32 countLocal = 0; + while (Data.size()) { + auto& t = *Data.begin(); + while (t.second.size()) { + auto& b = *t.second.begin(); + if (result && countLocal >= count) { + std::swap(*result, resultLocal); + return; + } + if (countLocal >= count) { + return; + } + ++countLocal; + if (result) { + resultLocal.Add(t.first, b); + } + t.second.erase(t.second.begin()); + } + if (t.second.empty()) { + Data.erase(Data.begin()); + } + } + if (result) { + std::swap(*result, resultLocal); + } + } + + bool Add(const TTabletId tabletId, const TUnifiedBlobId& blobId) { + auto it = Data.find(tabletId); + if (it == Data.end()) { + THashSet hashSet = {blobId}; + Data.emplace(tabletId, std::move(hashSet)); + return true; + } else { + return it->second.emplace(blobId).second; + } + } + + bool IsEmpty() const { + return Data.empty(); + } + + bool Remove(const TTabletId tabletId, const TUnifiedBlobId& blobId) { + auto it = Data.find(tabletId); + if (it == Data.end()) { + return false; + } else { + const bool result = it->second.erase(blobId); + if (result && it->second.empty()) { + Data.erase(it); + } + return result; + } + } + + bool Remove(const TTabletId tabletId) { + auto it = Data.find(tabletId); + if (it == Data.end()) { + return false; + } else { + Data.erase(it); + return true; + } + } +}; + +class TBlobsCategories { +private: + TTabletId SelfTabletId; + YDB_READONLY_DEF(TBlobsByTablet, Sharing); + YDB_ACCESSOR_DEF(TBlobsByTablet, Direct); + YDB_READONLY_DEF(TBlobsByTablet, Borrowed); +public: + bool IsEmpty() const { + return Sharing.IsEmpty() && Direct.IsEmpty() && Borrowed.IsEmpty(); + } + + class TIterator { + private: + const TBlobsCategories* Owner; + std::optional Sharing; + std::optional Direct; + std::optional Borrowed; + TBlobsByTablet::TIterator* CurrentIterator = nullptr; + + void SwitchIterator() { + CurrentIterator = nullptr; + if (Sharing && Sharing->IsValid()) { + CurrentIterator = &*Sharing; + } else { + Sharing.reset(); + } + if (Direct && Direct->IsValid()) { + CurrentIterator = &*Direct; + } else { + Direct.reset(); + } + if (Borrowed && Borrowed->IsValid()) { + CurrentIterator = &*Borrowed; + } else { + Borrowed.reset(); + } + } + + public: + TIterator(const TBlobsCategories& owner) + : Owner(&owner) + { + Sharing = Owner->Sharing.GetIterator(); + Direct = Owner->Direct.GetIterator(); + Borrowed = Owner->Borrowed.GetIterator(); + SwitchIterator(); + } + + const TUnifiedBlobId& GetBlobId() const { + AFL_VERIFY(IsValid()); + return CurrentIterator->GetBlobId(); + } + + TTabletId GetTabletId() const { + AFL_VERIFY(IsValid()); + return CurrentIterator->GetTabletId(); + } + + bool IsValid() const { + return CurrentIterator && CurrentIterator->IsValid(); + } + + void operator++() { + AFL_VERIFY(IsValid()); + ++*CurrentIterator; + if (!CurrentIterator->IsValid()) { + SwitchIterator(); + } + } + }; + + TIterator GetIterator() const { + return TIterator(*this); + } + + void AddDirect(const TTabletId tabletId, const TUnifiedBlobId& id) { + AFL_VERIFY(Direct.Add(tabletId, id)); + } + void AddBorrowed(const TTabletId tabletId, const TUnifiedBlobId& id) { + AFL_VERIFY(Borrowed.Add(tabletId, id)); + } + void AddSharing(const TTabletId tabletId, const TUnifiedBlobId& id) { + AFL_VERIFY(Sharing.Add(tabletId, id)); + } + void RemoveSharing(const TTabletId tabletId, const TUnifiedBlobId& id) { + Y_UNUSED(Sharing.Remove(tabletId, id)); + } + void RemoveBorrowed(const TTabletId tabletId, const TUnifiedBlobId& id) { + Y_UNUSED(Borrowed.Remove(tabletId, id)); + } + TBlobsCategories(const TTabletId selfTabletId) + : SelfTabletId(selfTabletId) + { + Y_UNUSED(SelfTabletId); + } +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/common.h b/ydb/core/tx/columnshard/blobs_action/abstract/common.h index 80ec48f7c952..8aa3881fec99 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/common.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/common.h @@ -1,9 +1,11 @@ #pragma once -#include #include -#include +#include + #include #include +#include +#include namespace NKikimr::NOlap { diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp index dc1783692635..816ac9a8dfd2 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/gc.cpp @@ -1,20 +1,34 @@ #include "gc.h" #include +#include namespace NKikimr::NOlap { void IBlobsGCAction::OnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction) { if (!AbortedFlag) { + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", self.TabletID()); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnCompleteTxAfterCleaning")("action_guid", GetActionGuid()); + auto storage = self.GetStoragesManager()->GetOperator(GetStorageId()); + storage->GetSharedBlobs()->OnTransactionCompleteAfterCleaning(BlobsToRemove); + for (auto i = BlobsToRemove.GetIterator(); i.IsValid(); ++i) { + Counters->OnReply(i.GetBlobId().BlobSize()); + } if (!DoOnCompleteTxAfterCleaning(self, taskAction)) { return; } - taskAction->OnFinished(); + OnFinished(); + NYDBTest::TControllers::GetColumnShardController()->OnAfterGCAction(self, *taskAction); } } -void IBlobsGCAction::OnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) { +void IBlobsGCAction::OnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs) { if (!AbortedFlag) { + const NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", self.TabletID()); + auto storage = self.GetStoragesManager()->GetOperator(GetStorageId()); + storage->GetSharedBlobs()->OnTransactionExecuteAfterCleaning(BlobsToRemove, dbBlobs.GetDatabase()); + for (auto i = BlobsToRemove.GetIterator(); i.IsValid(); ++i) { + RemoveBlobIdFromDB(i.GetTabletId(), i.GetBlobId(), dbBlobs); + } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnExecuteTxAfterCleaning")("action_guid", GetActionGuid()); return DoOnExecuteTxAfterCleaning(self, dbBlobs); } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/gc.h b/ydb/core/tx/columnshard/blobs_action/abstract/gc.h index e310b9c4e935..ad381063e79e 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/gc.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/gc.h @@ -1,35 +1,56 @@ #pragma once #include "common.h" #include +#include +#include +#include +#include namespace NKikimr::NColumnShard { class TColumnShard; -class TBlobManagerDb; } namespace NKikimr::NOlap { +class TBlobManagerDb; class IBlobsGCAction: public ICommonBlobsAction { private: using TBase = ICommonBlobsAction; +protected: + TBlobsCategories BlobsToRemove; + std::shared_ptr Counters; protected: bool AbortedFlag = false; bool FinishedFlag = false; - virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) = 0; + virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs) = 0; virtual bool DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction) = 0; + virtual void RemoveBlobIdFromDB(const TTabletId tabletId, const TUnifiedBlobId& blobId, TBlobManagerDb& dbBlobs) = 0; + virtual bool DoIsEmpty() const = 0; public: - void OnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs); + void OnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs); void OnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction); + const TBlobsCategories& GetBlobsToRemove() const { + return BlobsToRemove; + } + + bool IsEmpty() const { + return BlobsToRemove.IsEmpty() && DoIsEmpty(); + } + bool IsInProgress() const { return !AbortedFlag && !FinishedFlag; } - IBlobsGCAction(const TString& storageId) + IBlobsGCAction(const TString& storageId, TBlobsCategories&& blobsToRemove, const std::shared_ptr& counters) : TBase(storageId) + , BlobsToRemove(std::move(blobsToRemove)) + , Counters(counters) { - + for (auto i = BlobsToRemove.GetIterator(); i.IsValid(); ++i) { + Counters->OnRequest(i.GetBlobId().BlobSize()); + } } void Abort(); diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/gc_actor.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/gc_actor.cpp new file mode 100644 index 000000000000..7877709b4558 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/gc_actor.cpp @@ -0,0 +1,5 @@ +#include "gc_actor.h" + +namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/gc_actor.h b/ydb/core/tx/columnshard/blobs_action/abstract/gc_actor.h new file mode 100644 index 000000000000..009dee623caf --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/abstract/gc_actor.h @@ -0,0 +1,72 @@ +#pragma once +#include "blob_set.h" +#include "common.h" + +#include + +#include +#include + +namespace NKikimr::NOlap::NBlobOperations { + +template +class TSharedBlobsCollectionActor: public TActorBootstrapped { +private: + using TBase = TActorBootstrapped; + const TString OperatorId; + TBlobsByTablet BlobIdsByTablets; + const TTabletId SelfTabletId; + virtual void DoOnSharedRemovingFinished() = 0; + void OnSharedRemovingFinished() { + SharedRemovingFinished = true; + DoOnSharedRemovingFinished(); + } + void Handle(NEvents::TEvDeleteSharedBlobsFinished::TPtr& ev) { + AFL_VERIFY(BlobIdsByTablets.Remove((TTabletId)ev->Get()->Record.GetTabletId())); + if (BlobIdsByTablets.IsEmpty()) { + AFL_VERIFY(!SharedRemovingFinished); + OnSharedRemovingFinished(); + } + } + void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev) { + auto* blobIds = BlobIdsByTablets.Find((TTabletId)ev->Cookie); + AFL_VERIFY(blobIds); + auto evResend = std::make_unique(TBase::SelfId(), ev->Cookie, OperatorId, *blobIds); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(evResend.release(), ev->Cookie, true), IEventHandle::FlagTrackDelivery, ev->Cookie); + } +protected: + bool SharedRemovingFinished = false; +public: + TSharedBlobsCollectionActor(const TString& operatorId, const TTabletId selfTabletId, const TBlobsByTablet& blobIds) + : OperatorId(operatorId) + , BlobIdsByTablets(blobIds) + , SelfTabletId(selfTabletId) + { + + } + + STFUNC(StateWork) { + switch (ev->GetTypeRewrite()) { + hFunc(NEvents::TEvDeleteSharedBlobsFinished, Handle); + hFunc(NActors::TEvents::TEvUndelivered, Handle); + default: + AFL_VERIFY(false)("problem", "unexpected event")("event_type", ev->GetTypeName()); + } + } + + void Bootstrap(const TActorContext& /*ctx*/) { + if (BlobIdsByTablets.IsEmpty()) { + OnSharedRemovingFinished(); + } else { + for (auto&& i : BlobIdsByTablets) { + auto ev = std::make_unique(TBase::SelfId(), (ui64)SelfTabletId, OperatorId, i.second); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(ev.release(), (ui64)i.first, true), IEventHandle::FlagTrackDelivery, (ui64)i.first); + } + } + } + +}; + +} diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp index 766dd21da79d..baba7ec7b6c9 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/read.cpp @@ -1,82 +1,96 @@ #include "read.h" #include +#include namespace NKikimr::NOlap { -void IBlobsReadingAction::StartReading(THashMap>&& ranges) { +void IBlobsReadingAction::StartReading(std::vector&& ranges) { AFL_VERIFY(ranges.size()); AFL_VERIFY(Counters); for (auto&& i : ranges) { - AFL_VERIFY(i.second.size()); - for (auto&& br : i.second) { - Counters->OnRequest(br.Size); - } + Counters->OnRequest(i.Size); } - return DoStartReading(ranges); -} - -void IBlobsReadingAction::ExtractBlobsDataTo(THashMap& result) { - AFL_VERIFY(Started); - if (result.empty()) { - std::swap(result, Replies); - } else { - for (auto&& i : Replies) { - AFL_VERIFY(result.emplace(i.first, std::move(i.second)).second); - } - Replies.clear(); + THashSet result; + Groups = GroupBlobsForOptimization(std::move(ranges)); + for (auto&& [range, _] :Groups) { + result.emplace(range); } - RangesForResult.clear(); + return DoStartReading(std::move(result)); } void IBlobsReadingAction::Start(const THashSet& rangesInProgress) { Y_ABORT_UNLESS(!Started); + Started = true; + Y_ABORT_UNLESS(RangesForRead.size() + RangesForResult.size()); StartWaitingRanges = TMonotonic::Now(); - for (auto&& i : RangesForRead) { - WaitingRangesCount += i.second.size(); - } - THashMap> rangesFiltered; + WaitingRangesCount = RangesForRead.size(); + std::vector rangesFiltered; if (rangesInProgress.empty()) { - rangesFiltered = RangesForRead; + rangesFiltered.insert(rangesFiltered.end(), RangesForRead.begin(), RangesForRead.end()); } else { - for (auto&& i : RangesForRead) { - for (auto&& r : i.second) { - if (!rangesInProgress.contains(r)) { - rangesFiltered[r.BlobId].emplace(r); - } + for (auto&& r : RangesForRead) { + if (!rangesInProgress.contains(r)) { + rangesFiltered.emplace_back(r); } } } if (rangesFiltered.size()) { StartReading(std::move(rangesFiltered)); } - Started = true; for (auto&& i : RangesForResult) { + AFL_VERIFY(i.second.size() == i.first.Size); AFL_VERIFY(Replies.emplace(i.first, i.second).second); } } void IBlobsReadingAction::OnReadResult(const TBlobRange& range, const TString& data) { + auto it = Groups.find(range); + AFL_VERIFY(it != Groups.end()); AFL_VERIFY(Counters); - AFL_VERIFY(--WaitingRangesCount >= 0); + WaitingRangesCount -= it->second.size(); + AFL_VERIFY(WaitingRangesCount >= 0); Counters->OnReply(range.Size, TMonotonic::Now() - StartWaitingRanges); - Replies.emplace(range, data); + AFL_VERIFY(data.size() == range.Size); + for (auto&& i : it->second) { + AFL_VERIFY(i.Offset + i.GetBlobSize() <= range.Offset + data.size()); + AFL_VERIFY(range.Offset <= i.Offset); + Replies.emplace(i, data.substr(i.Offset - range.Offset, i.GetBlobSize())); + } + Groups.erase(it); } void IBlobsReadingAction::OnReadError(const TBlobRange& range, const TErrorStatus& replyStatus) { + auto it = Groups.find(range); + AFL_VERIFY(it != Groups.end()); + AFL_VERIFY(Counters); - AFL_VERIFY(--WaitingRangesCount >= 0); + WaitingRangesCount -= it->second.size(); + AFL_VERIFY(WaitingRangesCount >= 0); Counters->OnFail(range.Size, TMonotonic::Now() - StartWaitingRanges); - Fails.emplace(range, replyStatus); + for (auto&& i : it->second) { + Fails.emplace(i, replyStatus); + } + Groups.erase(it); } -void IBlobsReadingAction::AddRange(const TBlobRange& range, const TString& result /*= Default()*/) { +void IBlobsReadingAction::AddRange(const TBlobRange& range, const std::optional& result /*= {}*/) { Y_ABORT_UNLESS(!Started); if (!result) { - AFL_VERIFY(RangesForRead[range.BlobId].emplace(range).second)("range", range.ToString()); + AFL_VERIFY(!RangesForResult.contains(range)); + AFL_VERIFY(RangesForRead.emplace(range).second)("range", range.ToString()); } else { - AFL_VERIFY(RangesForResult.emplace(range, result).second)("range", range.ToString()); + AFL_VERIFY(result->size() == range.Size); + AFL_VERIFY(RangesForResult.emplace(range, *result).second)("range", range.ToString()); + } +} + +TString TActionReadBlobs::DebugString() const { + THashSet ranges; + for (auto&& i : Blobs) { + ranges.emplace(i.first); } + return JoinSeq(",", ranges); } } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/read.h b/ydb/core/tx/columnshard/blobs_action/abstract/read.h index 06a04c13c521..2a916305f87a 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/read.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/read.h @@ -8,26 +8,175 @@ namespace NKikimr::NOlap { +class TActionReadBlobs { +private: + THashMap Blobs; +public: + TString DebugString() const; + + TActionReadBlobs() = default; + + TActionReadBlobs(THashMap&& blobs) + : Blobs(std::move(blobs)) + { + for (auto&& i : Blobs) { + AFL_VERIFY(i.second.size()); + } + } + + void Merge(TActionReadBlobs&& item) { + for (auto&& i : item.Blobs) { + Add(i.first, std::move(i.second)); + } + } + + THashMap::iterator begin() { + return Blobs.begin(); + } + + THashMap::iterator end() { + return Blobs.end(); + } + + ui64 GetTotalBlobsSize() const { + ui64 result = 0; + for (auto&& i : Blobs) { + result += i.second.size(); + } + return result; + } + + void Add(THashMap&& blobs) { + for (auto&& i : blobs) { + AFL_VERIFY(i.second.size()); + AFL_VERIFY(Blobs.emplace(i.first, std::move(i.second)).second); + } + } + + void Add(const TBlobRange& range, TString&& data) { + AFL_VERIFY(data.size()); + AFL_VERIFY(Blobs.emplace(range, std::move(data)).second); + } + + TString Extract(const TBlobRange& bRange) { + auto it = Blobs.find(bRange); + AFL_VERIFY(it != Blobs.end()); + TString result = it->second; + Blobs.erase(it); + return result; + } + + bool IsEmpty() const { + return Blobs.empty(); + } +}; + +class TBlobsGlueing { +public: + class TSequentialGluePolicy { + public: + bool Glue(TBlobRange& currentRange, const TBlobRange& addRange) const { + return currentRange.TryGlueWithNext(addRange); + } + }; + + class TBlobGluePolicy { + private: + const ui64 BlobLimitSize = 8LLU << 20; + public: + TBlobGluePolicy(const ui64 blobLimitSize) + : BlobLimitSize(blobLimitSize) + { + } + + bool Glue(TBlobRange& currentRange, const TBlobRange& addRange) const { + return currentRange.TryGlueSameBlob(addRange, BlobLimitSize); + } + }; + + template + static THashMap> GroupRanges(std::vector&& ranges, const TGluePolicy& policy) { + std::sort(ranges.begin(), ranges.end()); + THashMap> result; + std::optional currentRange; + std::vector currentList; + for (auto&& br : ranges) { + if (!currentRange) { + currentRange = br; + } + else if (!policy.Glue(*currentRange, br)) { + result.emplace(*currentRange, std::move(currentList)); + currentRange = br; + currentList.clear(); + } + currentList.emplace_back(br); + } + if (currentRange) { + result.emplace(*currentRange, std::move(currentList)); + } + return result; + } + +}; + class IBlobsReadingAction: public ICommonBlobsAction { public: using TErrorStatus = TConclusionSpecialStatus; private: using TBase = ICommonBlobsAction; - THashMap> RangesForRead; + THashSet RangesForRead; THashMap RangesForResult; TMonotonic StartWaitingRanges; i32 WaitingRangesCount = 0; THashMap Replies; THashMap Fails; + THashMap> Groups; std::shared_ptr Counters; bool Started = false; + bool DataExtracted = false; YDB_ACCESSOR(bool, IsBackgroundProcess, true); protected: - virtual void DoStartReading(const THashMap>& range) = 0; - void StartReading(THashMap>&& ranges); + virtual void DoStartReading(THashSet&& range) = 0; + void StartReading(std::vector&& ranges); + virtual THashMap> GroupBlobsForOptimization(std::vector&& ranges) const = 0; public: + const THashMap>& GetGroups() const { + return Groups; + } + + void Merge(const std::shared_ptr& action) { + AFL_VERIFY(action); + AFL_VERIFY(!Started); + for (auto&& i : action->RangesForResult) { + RangesForResult.emplace(i.first, i.second); + auto it = RangesForRead.find(i.first); + if (it != RangesForRead.end()) { + RangesForRead.erase(it); + } + } + for (auto&& i : action->RangesForResult) { + RangesForResult.emplace(i.first, i.second); + } + for (auto&& i : action->RangesForRead) { + if (!RangesForResult.contains(i)) { + RangesForRead.emplace(i); + } + } + } + + TActionReadBlobs ExtractBlobsData() { + AFL_VERIFY(Started); + AFL_VERIFY(IsFinished()); + AFL_VERIFY(!DataExtracted); + DataExtracted = true; + auto result = TActionReadBlobs(std::move(Replies)); + RangesForResult.clear(); + Replies.clear(); + return result; + } + void SetCounters(std::shared_ptr counters) { Counters = counters; } @@ -38,14 +187,10 @@ class IBlobsReadingAction: public ICommonBlobsAction { } - void ExtractBlobsDataTo(THashMap& result); - ui64 GetExpectedBlobsSize() const { ui64 result = 0; for (auto&& i : RangesForRead) { - for (auto&& b : i.second) { - result += b.Size; - } + result += i.Size; } for (auto&& i : RangesForResult) { result += i.first.Size; @@ -54,29 +199,10 @@ class IBlobsReadingAction: public ICommonBlobsAction { } ui64 GetExpectedBlobsCount() const { - ui64 result = 0; - for (auto&& i : RangesForRead) { - result += i.second.size(); - } - return result + RangesForResult.size(); + return RangesForRead.size() + RangesForResult.size(); } - void FillExpectedRanges(THashSet& ranges) const { - for (auto&& i : RangesForRead) { - for (auto&& b : i.second) { - Y_ABORT_UNLESS(ranges.emplace(b).second); - } - } - for (auto&& i : RangesForResult) { - Y_ABORT_UNLESS(ranges.emplace(i.first).second); - } - } - - const THashMap>& GetRangesForRead() const { - return RangesForRead; - } - - void AddRange(const TBlobRange& range, const TString& result = Default()); + void AddRange(const TBlobRange& range, const std::optional& result = {}); void Start(const THashSet& rangesInProgress); void OnReadResult(const TBlobRange& range, const TString& data); @@ -91,4 +217,46 @@ class IBlobsReadingAction: public ICommonBlobsAction { } }; +class TReadActionsCollection { +private: + THashMap> Actions; +public: + THashMap>::const_iterator begin() const { + return Actions.begin(); + } + + THashMap>::const_iterator end() const { + return Actions.end(); + } + + THashMap>::iterator begin() { + return Actions.begin(); + } + + THashMap>::iterator end() { + return Actions.end(); + } + + ui32 IsEmpty() const { + return Actions.empty(); + } + + void Add(const std::shared_ptr& action) { + auto it = Actions.find(action->GetStorageId()); + if (it == Actions.end()) { + Actions.emplace(action->GetStorageId(), action); + } else { + it->second->Merge(action); + } + } + + TReadActionsCollection() = default; + + TReadActionsCollection(const std::vector>& actions) { + for (auto&& a: actions) { + Add(a); + } + } +}; + } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp index f15292cdb3a6..3296333ebcc5 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/remove.cpp @@ -3,12 +3,16 @@ namespace NKikimr::NOlap { -void IBlobsDeclareRemovingAction::DeclareRemove(const TUnifiedBlobId& blobId) { - if (DeclaredBlobs.emplace(blobId).second) { - ACFL_DEBUG("event", "DeclareRemove")("blob_id", blobId); +void IBlobsDeclareRemovingAction::DeclareRemove(const TTabletId tabletId, const TUnifiedBlobId& blobId) { + if (DeclaredBlobs.Add(tabletId, blobId)) { + ACFL_DEBUG("event", "DeclareRemove")("blob_id", blobId)("tablet_id", (ui64)tabletId); Counters->OnRequest(blobId.BlobSize()); - return DoDeclareRemove(blobId); + return DoDeclareRemove(tabletId, blobId); } } +void IBlobsDeclareRemovingAction::DeclareSelfRemove(const TUnifiedBlobId& blobId) { + DeclareRemove(SelfTabletId, blobId); +} + } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/remove.h b/ydb/core/tx/columnshard/blobs_action/abstract/remove.h index 2b68a3753c02..e48ae7c2bb54 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/remove.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/remove.h @@ -1,4 +1,5 @@ #pragma once +#include "blob_set.h" #include "common.h" #include #include @@ -7,37 +8,37 @@ namespace NKikimr::NColumnShard { class TColumnShard; -class TBlobManagerDb; } namespace NKikimr::NOlap { +class TBlobManagerDb; class IBlobsDeclareRemovingAction: public ICommonBlobsAction { private: + const TTabletId SelfTabletId; using TBase = ICommonBlobsAction; std::shared_ptr Counters; - YDB_READONLY_DEF(THashSet, DeclaredBlobs); + YDB_READONLY_DEF(TTabletsByBlob, DeclaredBlobs); protected: - virtual void DoDeclareRemove(const TUnifiedBlobId& blobId) = 0; - virtual void DoOnExecuteTxAfterRemoving(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) = 0; - virtual void DoOnCompleteTxAfterRemoving(NColumnShard::TColumnShard& self, const bool blobsWroteSuccessfully) = 0; + virtual void DoDeclareRemove(const TTabletId tabletId, const TUnifiedBlobId& blobId) = 0; + virtual void DoOnExecuteTxAfterRemoving(TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) = 0; + virtual void DoOnCompleteTxAfterRemoving(const bool blobsWroteSuccessfully) = 0; public: - IBlobsDeclareRemovingAction(const TString& storageId) + IBlobsDeclareRemovingAction(const TString& storageId, const TTabletId& selfTabletId, const std::shared_ptr& counters) : TBase(storageId) + , SelfTabletId(selfTabletId) + , Counters(counters) { } - void SetCounters(const std::shared_ptr& counters) { - Counters = counters; - } - - void DeclareRemove(const TUnifiedBlobId& blobId); - void OnExecuteTxAfterRemoving(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { - return DoOnExecuteTxAfterRemoving(self, dbBlobs, blobsWroteSuccessfully); + void DeclareRemove(const TTabletId tabletId, const TUnifiedBlobId& blobId); + void DeclareSelfRemove(const TUnifiedBlobId& blobId); + void OnExecuteTxAfterRemoving(TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { + return DoOnExecuteTxAfterRemoving(dbBlobs, blobsWroteSuccessfully); } - void OnCompleteTxAfterRemoving(NColumnShard::TColumnShard& self, const bool blobsWroteSuccessfully) { - return DoOnCompleteTxAfterRemoving(self, blobsWroteSuccessfully); + void OnCompleteTxAfterRemoving(const bool blobsWroteSuccessfully) { + return DoOnCompleteTxAfterRemoving(blobsWroteSuccessfully); } }; diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storage.h b/ydb/core/tx/columnshard/blobs_action/abstract/storage.h index b0a565ee55bf..d99e949e308b 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/storage.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storage.h @@ -7,11 +7,10 @@ #include #include #include -#include +#include -namespace NKikimr::NColumnShard { -class TTiersManager; -} +#include +#include namespace NKikimr::NOlap { @@ -29,21 +28,25 @@ class TCommonBlobsTracker: public IBlobInUseTracker { class IBlobsStorageOperator { private: + YDB_READONLY_DEF(TTabletId, SelfTabletId); YDB_READONLY_DEF(TString, StorageId); - std::shared_ptr CurrentGCAction; YDB_READONLY(bool, Stopped, false); std::shared_ptr Counters; + YDB_ACCESSOR_DEF(std::shared_ptr, SharedBlobs); protected: - virtual std::shared_ptr DoStartDeclareRemovingAction() = 0; + virtual std::shared_ptr DoStartDeclareRemovingAction(const std::shared_ptr& counters) = 0; virtual std::shared_ptr DoStartWritingAction() = 0; virtual std::shared_ptr DoStartReadingAction() = 0; - virtual bool DoLoad(NColumnShard::IBlobManagerDb& dbBlobs) = 0; + virtual bool DoLoad(IBlobManagerDb& dbBlobs) = 0; virtual bool DoStop() { return true; } + virtual const NSplitter::TSplitSettings& DoGetBlobSplitSettings() const { + return Default(); + } - virtual void DoOnTieringModified(const std::shared_ptr& tiers) = 0; + virtual void DoOnTieringModified(const std::shared_ptr& tiers) = 0; virtual TString DoDebugString() const { return ""; } @@ -54,14 +57,21 @@ class IBlobsStorageOperator { } public: - IBlobsStorageOperator(const TString& storageId) - : StorageId(storageId) + IBlobsStorageOperator(const TString& storageId, const std::shared_ptr& sharedBlobs) + : SelfTabletId(sharedBlobs->GetSelfTabletId()) + , StorageId(storageId) + , SharedBlobs(sharedBlobs) { Counters = std::make_shared(storageId); } void Stop(); + const NSplitter::TSplitSettings& GetBlobSplitSettings() const { + return DoGetBlobSplitSettings(); + } + + virtual TTabletsByBlob GetBlobsToDelete() const = 0; virtual std::shared_ptr GetBlobsTracker() const = 0; virtual ~IBlobsStorageOperator() = default; @@ -70,36 +80,38 @@ class IBlobsStorageOperator { return TStringBuilder() << "(storage_id=" << StorageId << ";details=(" << DoDebugString() << "))"; } - bool Load(NColumnShard::IBlobManagerDb& dbBlobs) { + bool Load(IBlobManagerDb& dbBlobs) { return DoLoad(dbBlobs); } - void OnTieringModified(const std::shared_ptr& tiers) { + void OnTieringModified(const std::shared_ptr& tiers) { + AFL_VERIFY(tiers); return DoOnTieringModified(tiers); } - std::shared_ptr StartDeclareRemovingAction(const TString& consumerId) { - auto result = DoStartDeclareRemovingAction(); - result->SetCounters(Counters->GetConsumerCounter(consumerId)->GetRemoveDeclareCounters()); - return result; + std::shared_ptr StartDeclareRemovingAction(const NBlobOperations::EConsumer consumerId) { + return DoStartDeclareRemovingAction(Counters->GetConsumerCounter(consumerId)->GetRemoveDeclareCounters()); } - std::shared_ptr StartWritingAction(const TString& consumerId) { + std::shared_ptr StartWritingAction(const NBlobOperations::EConsumer consumerId) { auto result = DoStartWritingAction(); result->SetCounters(Counters->GetConsumerCounter(consumerId)->GetWriteCounters()); return result; } - std::shared_ptr StartReadingAction(const TString& consumerId) { + std::shared_ptr StartReadingAction(const NBlobOperations::EConsumer consumerId) { auto result = DoStartReadingAction(); result->SetCounters(Counters->GetConsumerCounter(consumerId)->GetReadCounters()); return result; } bool StartGC() { + NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("storage_id", GetStorageId())("tablet_id", GetSelfTabletId()); if (CurrentGCAction && CurrentGCAction->IsInProgress()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "gc_in_progress"); return false; } if (Stopped) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "stopped_on_gc"); return false; } - auto task = StartGCAction(Counters->GetConsumerCounter("GC")->GetRemoveGCCounters()); + auto task = StartGCAction(Counters->GetConsumerCounter(NBlobOperations::EConsumer::GC)->GetRemoveGCCounters()); if (!task) { return false; } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp index 88b3ca44473c..8b80dd9be277 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.cpp @@ -4,7 +4,25 @@ namespace NKikimr::NOlap { -std::shared_ptr IStoragesManager::GetOperator(const TString& storageId) { +std::shared_ptr IStoragesManager::GetOperatorOptional(const TString& storageId) const { + AFL_VERIFY(Initialized); + AFL_VERIFY(storageId); + TReadGuard rg(RWMutex); + auto it = Constructed.find(storageId); + if (it != Constructed.end()) { + return it->second; + } else { + return nullptr; + } +} + +std::shared_ptr IStoragesManager::GetOperatorVerified(const TString& storageId) const { + auto result = GetOperatorOptional(storageId); + AFL_VERIFY(result)("storage_id", storageId); + return result; +} + +std::shared_ptr IStoragesManager::GetOperatorGuarantee(const TString& storageId) { TReadGuard rg(RWMutex); auto it = Constructed.find(storageId); if (it == Constructed.end()) { @@ -19,23 +37,61 @@ std::shared_ptr IStoragesManager::GetOper return it->second; } -std::shared_ptr IStoragesManager::InitializePortionOperator(const TPortionInfo& portionInfo) { - Y_ABORT_UNLESS(!portionInfo.HasStorageOperator()); - if (portionInfo.GetMeta().GetTierName()) { - return GetOperator(portionInfo.GetMeta().GetTierName()); - } else { - return GetOperator(DefaultStorageId); - } +std::shared_ptr IStoragesManager::GetOperator(const TString& storageId) { + return GetOperatorGuarantee(storageId); } -void IStoragesManager::OnTieringModified(const std::shared_ptr& tiers) { +void IStoragesManager::OnTieringModified(const std::shared_ptr& tiers) { + AFL_VERIFY(tiers); for (auto&& i : tiers->GetManagers()) { - GetOperator(i.second.GetTierName())->OnTieringModified(tiers); + GetOperatorGuarantee(i.first)->OnTieringModified(tiers); } } -void IStoragesManager::InitializeNecessaryStorages() { +void IStoragesManager::DoInitialize() { GetOperator(DefaultStorageId); + GetOperator(MemoryStorageId); +} + +bool IStoragesManager::LoadIdempotency(NTable::TDatabase& database) { + AFL_VERIFY(Initialized); + if (!DoLoadIdempotency(database)) { + return false; + } + TBlobManagerDb blobsDB(database); + for (auto&& i : GetStorages()) { + if (!i.second->Load(blobsDB)) { + return false; + } + } + GetOperatorVerified(DefaultStorageId); + GetSharedBlobsManager()->GetStorageManagerVerified(DefaultStorageId); + return true; +} + +bool IStoragesManager::HasBlobsToDelete() const { + for (auto&& i : Constructed) { + if (!i.second->GetBlobsToDelete().IsEmpty()) { + return true; + } + } + return false; +} + +std::shared_ptr IStoragesManager::BuildOperator(const TString& storageId) { + auto result = DoBuildOperator(storageId); + AFL_VERIFY(result)("storage_id", storageId); + return result; +} + +void IStoragesManager::Stop() { + AFL_VERIFY(!Finished); + if (Initialized && !Finished) { + for (auto&& i : Constructed) { + i.second->Stop(); + } + Finished = true; + } } } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h index dd89f7ff2512..cc0e8f606d3f 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/storages_manager.h @@ -1,5 +1,6 @@ #pragma once #include "storage.h" +#include namespace NKikimr::NOlap { @@ -7,46 +8,60 @@ class TPortionInfo; class IStoragesManager { private: - TRWMutex RWMutex; + mutable TRWMutex RWMutex; + bool Initialized = false; + bool Finished = false; protected: virtual std::shared_ptr DoBuildOperator(const TString& storageId) = 0; THashMap> Constructed; - std::shared_ptr BuildOperator(const TString& storageId) { - auto result = DoBuildOperator(storageId); - Y_ABORT_UNLESS(result); - return result; - } + std::shared_ptr BuildOperator(const TString& storageId); + + virtual void DoInitialize(); + virtual bool DoLoadIdempotency(NTable::TDatabase& database) = 0; + virtual const std::shared_ptr& DoGetSharedBlobsManager() const = 0; - virtual void InitializeNecessaryStorages(); public: - static const inline TString DefaultStorageId = "__DEFAULT"; + static const inline TString DefaultStorageId = NBlobOperations::TGlobal::DefaultStorageId; + static const inline TString MemoryStorageId = NBlobOperations::TGlobal::MemoryStorageId; virtual ~IStoragesManager() = default; - IStoragesManager() = default; + void Initialize() { + AFL_VERIFY(!Initialized); + Initialized = true; + DoInitialize(); + } - void Stop() { - for (auto&& i : Constructed) { - i.second->Stop(); - } + IStoragesManager() = default; + const std::shared_ptr& GetSharedBlobsManager() const { + AFL_VERIFY(Initialized); + return DoGetSharedBlobsManager(); } - std::shared_ptr GetDefaultOperator() { - return GetOperator(DefaultStorageId); + bool LoadIdempotency(NTable::TDatabase& database); + + bool HasBlobsToDelete() const; + + void Stop(); + + std::shared_ptr GetDefaultOperator() const { + return GetOperatorVerified(DefaultStorageId); } - std::shared_ptr GetInsertOperator() { + std::shared_ptr GetInsertOperator() const { return GetDefaultOperator(); } const THashMap>& GetStorages() { - InitializeNecessaryStorages(); + AFL_VERIFY(Initialized); return Constructed; } - void OnTieringModified(const std::shared_ptr& tiers); + void OnTieringModified(const std::shared_ptr& tiers); std::shared_ptr GetOperator(const TString& storageIdExt); - std::shared_ptr InitializePortionOperator(const TPortionInfo& portionInfo); + std::shared_ptr GetOperatorGuarantee(const TString& storageIdExt); + std::shared_ptr GetOperatorVerified(const TString& storageIdExt) const; + std::shared_ptr GetOperatorOptional(const TString& storageIdExt) const; }; diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp b/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp index 60a3635e3fd1..1319e97edf22 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/abstract/write.cpp @@ -3,14 +3,21 @@ namespace NKikimr::NOlap { -TUnifiedBlobId IBlobsWritingAction::AddDataForWrite(const TString& data) { +TUnifiedBlobId IBlobsWritingAction::AddDataForWrite(const TString& data, const std::optional& externalBlobId) { Y_ABORT_UNLESS(!WritingStarted); auto blobId = AllocateNextBlobId(data); - AFL_VERIFY(BlobsForWrite.emplace(blobId, data).second); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("generated_blob_id", blobId.ToStringNew()); + AddDataForWrite(externalBlobId.value_or(blobId), data); + return externalBlobId.value_or(blobId); +} + +void IBlobsWritingAction::AddDataForWrite(const TUnifiedBlobId& blobId, const TString& data) { + AFL_VERIFY(blobId.IsValid())("blob_id", blobId.ToStringNew()); + AFL_VERIFY(blobId.BlobSize() == data.size()); + AFL_VERIFY(BlobsForWrite.emplace(blobId, data).second)("blob_id", blobId.ToStringNew()); BlobsWaiting.emplace(blobId); BlobsWriteCount += 1; SumSize += data.size(); - return blobId; } void IBlobsWritingAction::OnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status) { @@ -34,7 +41,7 @@ bool IBlobsWritingAction::IsReady() const { } IBlobsWritingAction::~IBlobsWritingAction() { - AFL_VERIFY(!NActors::TlsActivationContext || BlobsWaiting.empty() || Aborted); +// AFL_VERIFY(!NActors::TlsActivationContext || BlobsWaiting.empty() || Aborted); } void IBlobsWritingAction::SendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) { diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/write.h b/ydb/core/tx/columnshard/blobs_action/abstract/write.h index 0ae4f57abb0c..f20d4183cefb 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/write.h +++ b/ydb/core/tx/columnshard/blobs_action/abstract/write.h @@ -4,10 +4,10 @@ #include #include #include +#include namespace NKikimr::NColumnShard { class TColumnShard; -class TBlobManagerDb; } namespace NKikimr::NOlap { @@ -23,14 +23,15 @@ class IBlobsWritingAction: public ICommonBlobsAction { THashSet BlobsWaiting; bool Aborted = false; std::shared_ptr Counters; + void AddDataForWrite(const TUnifiedBlobId& blobId, const TString& data); protected: - virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) = 0; + virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs) = 0; virtual void DoOnCompleteTxBeforeWrite(NColumnShard::TColumnShard& self) = 0; virtual void DoSendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) = 0; virtual void DoOnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status) = 0; - virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) = 0; + virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) = 0; virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& self, const bool blobsWroteSuccessfully) = 0; virtual TUnifiedBlobId AllocateNextBlobId(const TString& data) = 0; @@ -43,6 +44,14 @@ class IBlobsWritingAction: public ICommonBlobsAction { virtual ~IBlobsWritingAction(); bool IsReady() const; + void Merge(const std::shared_ptr& action) { + AFL_VERIFY(action); + AFL_VERIFY(!WritingStarted); + for (auto&& i : action->BlobsForWrite) { + AddDataForWrite(i.first, i.second); + } + } + void SetCounters(std::shared_ptr counters) { Counters = counters; } @@ -54,11 +63,10 @@ class IBlobsWritingAction: public ICommonBlobsAction { void Abort() { Aborted = true; } - TUnifiedBlobId AddDataForWrite(const TString& data); - + TUnifiedBlobId AddDataForWrite(const TString& data, const std::optional& externalBlobId = {}); void OnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status); - void OnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) { + void OnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs) { return DoOnExecuteTxBeforeWrite(self, dbBlobs); } @@ -75,7 +83,7 @@ class IBlobsWritingAction: public ICommonBlobsAction { return DoOnCompleteTxBeforeWrite(self); } - void OnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { + void OnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { return DoOnExecuteTxAfterWrite(self, dbBlobs, blobsWroteSuccessfully); } @@ -86,4 +94,43 @@ class IBlobsWritingAction: public ICommonBlobsAction { void SendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId); }; +class TWriteActionsCollection { +private: + THashMap> Actions; +public: + THashMap>::const_iterator begin() const { + return Actions.begin(); + } + + THashMap>::const_iterator end() const { + return Actions.end(); + } + + THashMap>::iterator begin() { + return Actions.begin(); + } + + THashMap>::iterator end() { + return Actions.end(); + } + + std::shared_ptr Add(const std::shared_ptr& action) { + auto it = Actions.find(action->GetStorageId()); + if (it == Actions.end()) { + return Actions.emplace(action->GetStorageId(), action).first->second; + } else if (action.get() != it->second.get()) { + it->second->Merge(action); + } + return it->second; + } + + TWriteActionsCollection() = default; + + TWriteActionsCollection(const std::vector>& actions) { + for (auto&& a : actions) { + Add(a); + } + } +}; + } diff --git a/ydb/core/tx/columnshard/blobs_action/abstract/ya.make b/ydb/core/tx/columnshard/blobs_action/abstract/ya.make index 2f0b074a602a..b3b4c20028c8 100644 --- a/ydb/core/tx/columnshard/blobs_action/abstract/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/abstract/ya.make @@ -2,7 +2,9 @@ LIBRARY() SRCS( gc.cpp + gc_actor.cpp common.cpp + blob_set.cpp read.cpp write.cpp remove.cpp @@ -15,7 +17,11 @@ PEERDIR( ydb/core/protos contrib/libs/apache/arrow ydb/core/tablet_flat - ydb/core/tx/tiering + ydb/core/tx/tiering/abstract + ydb/core/tx/columnshard/blobs_action/common + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/blobs_action/events + ydb/core/tx/columnshard/blobs_action/protos ) END() diff --git a/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp b/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp index b13e664f6419..3bc5f2b8c3d6 100644 --- a/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp +++ b/ydb/core/tx/columnshard/blobs_action/blob_manager_db.cpp @@ -1,14 +1,17 @@ #include "blob_manager_db.h" #include +#include -namespace NKikimr::NColumnShard { +namespace NKikimr::NOlap { + +using namespace NKikimr::NColumnShard; bool TBlobManagerDb::LoadLastGcBarrier(TGenStep& lastCollectedGenStep) { NIceDb::TNiceDb db(Database); ui64 gen = 0; ui64 step = 0; - if (!Schema::GetSpecialValue(db, Schema::EValueIds::LastGcBarrierGen, gen) || - !Schema::GetSpecialValue(db, Schema::EValueIds::LastGcBarrierStep, step)) + if (!Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastGcBarrierGen, gen) || + !Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastGcBarrierStep, step)) { return false; } @@ -22,11 +25,11 @@ void TBlobManagerDb::SaveLastGcBarrier(const TGenStep& lastCollectedGenStep) { Schema::SaveSpecialValue(db, Schema::EValueIds::LastGcBarrierStep, std::get<1>(lastCollectedGenStep)); } -bool TBlobManagerDb::LoadLists(std::vector& blobsToKeep, std::vector& blobsToDelete, - const NOlap::IBlobGroupSelector* dsGroupSelector) +bool TBlobManagerDb::LoadLists(std::vector& blobsToKeep, TTabletsByBlob& blobsToDelete, + const IBlobGroupSelector* dsGroupSelector, const TTabletId selfTabletId) { blobsToKeep.clear(); - blobsToDelete.clear(); + TTabletsByBlob blobsToDeleteLocal; NIceDb::TNiceDb db(Database); @@ -39,12 +42,13 @@ bool TBlobManagerDb::LoadLists(std::vector& blobsToKeep, while (!rowset.EndOfSet()) { const TString blobIdStr = rowset.GetValue(); - NOlap::TUnifiedBlobId unifiedBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); - Y_ABORT_UNLESS(unifiedBlobId.IsValid(), "%s", error.c_str()); + TUnifiedBlobId unifiedBlobId = TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); + AFL_VERIFY(unifiedBlobId.IsValid())("event", "cannot_parse_blob")("error", error)("original_string", blobIdStr); blobsToKeep.push_back(unifiedBlobId); - if (!rowset.Next()) + if (!rowset.Next()) { return false; + } } } @@ -57,102 +61,174 @@ bool TBlobManagerDb::LoadLists(std::vector& blobsToKeep, while (!rowset.EndOfSet()) { const TString blobIdStr = rowset.GetValue(); - NOlap::TUnifiedBlobId unifiedBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); - Y_ABORT_UNLESS(unifiedBlobId.IsValid(), "%s", error.c_str()); - blobsToDelete.push_back(unifiedBlobId); - if (!rowset.Next()) + TUnifiedBlobId unifiedBlobId = TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); + AFL_VERIFY(unifiedBlobId.IsValid())("event", "cannot_parse_blob")("error", error)("original_string", blobIdStr); + blobsToDeleteLocal.Add(selfTabletId, unifiedBlobId); + if (!rowset.Next()) { + return false; + } + } + } + + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) + return false; + + TString error; + + while (!rowset.EndOfSet()) { + const TString blobIdStr = rowset.GetValue(); + TUnifiedBlobId unifiedBlobId = TUnifiedBlobId::ParseFromString(blobIdStr, dsGroupSelector, error); + AFL_VERIFY(unifiedBlobId.IsValid())("event", "cannot_parse_blob")("error", error)("original_string", blobIdStr); + blobsToDeleteLocal.Add((TTabletId)rowset.GetValue(), unifiedBlobId); + if (!rowset.Next()) { return false; + } } } + std::swap(blobsToDeleteLocal, blobsToDelete); return true; } -void TBlobManagerDb::AddBlobToKeep(const NOlap::TUnifiedBlobId& blobId) { +void TBlobManagerDb::AddBlobToKeep(const TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); db.Table().Key(blobId.ToStringLegacy()).Update(); } -void TBlobManagerDb::EraseBlobToKeep(const NOlap::TUnifiedBlobId& blobId) { +void TBlobManagerDb::EraseBlobToKeep(const TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); db.Table().Key(blobId.ToStringLegacy()).Delete(); db.Table().Key(blobId.ToStringNew()).Delete(); } -void TBlobManagerDb::AddBlobToDelete(const NOlap::TUnifiedBlobId& blobId) { +void TBlobManagerDb::AddBlobToDelete(const TUnifiedBlobId& blobId, const TTabletId tabletId) { NIceDb::TNiceDb db(Database); + db.Table().Key(blobId.ToStringLegacy(), (ui64)tabletId).Update(); db.Table().Key(blobId.ToStringLegacy()).Update(); } -void TBlobManagerDb::EraseBlobToDelete(const NOlap::TUnifiedBlobId& blobId) { +void TBlobManagerDb::EraseBlobToDelete(const TUnifiedBlobId& blobId, const TTabletId tabletId) { NIceDb::TNiceDb db(Database); db.Table().Key(blobId.ToStringLegacy()).Delete(); db.Table().Key(blobId.ToStringNew()).Delete(); + db.Table().Key(blobId.ToStringLegacy(), (ui64)tabletId).Delete(); + db.Table().Key(blobId.ToStringNew(), (ui64)tabletId).Delete(); } -bool TBlobManagerDb::LoadTierLists(const TString& storageId, std::deque& blobsToDelete, std::deque& draftBlobsToDelete) { - draftBlobsToDelete.clear(); - blobsToDelete.clear(); +bool TBlobManagerDb::LoadTierLists(const TString& storageId, TTabletsByBlob& blobsToDelete, std::deque& draftBlobsToDelete, const TTabletId selfTabletId) { + TTabletsByBlob localBlobsToDelete; + std::deque localDraftBlobsToDelete; NIceDb::TNiceDb db(Database); { auto rowset = db.Table().Prefix(storageId).Select(); - if (!rowset.IsReady()) + if (!rowset.IsReady()) { return false; + } TString error; while (!rowset.EndOfSet()) { const TString blobIdStr = rowset.GetValue(); - NOlap::TUnifiedBlobId unifiedBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobIdStr, nullptr, error); - Y_ABORT_UNLESS(unifiedBlobId.IsValid(), "%s", error.c_str()); + TUnifiedBlobId unifiedBlobId = TUnifiedBlobId::ParseFromString(blobIdStr, nullptr, error); + AFL_VERIFY(unifiedBlobId.IsValid())("event", "cannot_parse_blob")("error", error)("original_string", blobIdStr); + + localBlobsToDelete.Add(selfTabletId, unifiedBlobId); + if (!rowset.Next()) { + return false; + } + } + } + + { + auto rowset = db.Table().Prefix(storageId).Select(); + if (!rowset.IsReady()) { + return false; + } + + TString error; + + while (!rowset.EndOfSet()) { + const TString blobIdStr = rowset.GetValue(); + TUnifiedBlobId unifiedBlobId = TUnifiedBlobId::ParseFromString(blobIdStr, nullptr, error); + AFL_VERIFY(unifiedBlobId.IsValid())("event", "cannot_parse_blob")("error", error)("original_string", blobIdStr); - blobsToDelete.emplace_back(std::move(unifiedBlobId)); - if (!rowset.Next()) + localBlobsToDelete.Add((TTabletId)rowset.GetValue(), unifiedBlobId); + if (!rowset.Next()) { return false; + } } } { auto rowset = db.Table().Prefix(storageId).Select(); - if (!rowset.IsReady()) + if (!rowset.IsReady()) { return false; + } TString error; while (!rowset.EndOfSet()) { const TString blobIdStr = rowset.GetValue(); - NOlap::TUnifiedBlobId unifiedBlobId = NOlap::TUnifiedBlobId::ParseFromString(blobIdStr, nullptr, error); - Y_ABORT_UNLESS(unifiedBlobId.IsValid(), "%s", error.c_str()); + TUnifiedBlobId unifiedBlobId = TUnifiedBlobId::ParseFromString(blobIdStr, nullptr, error); + AFL_VERIFY(unifiedBlobId.IsValid())("event", "cannot_parse_blob")("error", error)("original_string", blobIdStr); - draftBlobsToDelete.emplace_back(std::move(unifiedBlobId)); - if (!rowset.Next()) + localDraftBlobsToDelete.emplace_back(std::move(unifiedBlobId)); + if (!rowset.Next()) { return false; + } } } + + std::swap(localBlobsToDelete, blobsToDelete); + std::swap(localDraftBlobsToDelete, draftBlobsToDelete); return true; } -void TBlobManagerDb::AddTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) { +void TBlobManagerDb::AddTierBlobToDelete(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) { NIceDb::TNiceDb db(Database); + db.Table().Key(storageId, blobId.ToStringNew(), (ui64)tabletId).Update(); db.Table().Key(storageId, blobId.ToStringNew()).Update(); } -void TBlobManagerDb::RemoveTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) { +void TBlobManagerDb::RemoveTierBlobToDelete(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) { NIceDb::TNiceDb db(Database); + db.Table().Key(storageId, blobId.ToStringNew(), (ui64)tabletId).Delete(); db.Table().Key(storageId, blobId.ToStringNew()).Delete(); } -void TBlobManagerDb::AddTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) { +void TBlobManagerDb::AddTierDraftBlobId(const TString& storageId, const TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); db.Table().Key(storageId, blobId.ToStringNew()).Update(); } -void TBlobManagerDb::RemoveTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) { +void TBlobManagerDb::RemoveTierDraftBlobId(const TString& storageId, const TUnifiedBlobId& blobId) { NIceDb::TNiceDb db(Database); db.Table().Key(storageId, blobId.ToStringNew()).Delete(); } +void TBlobManagerDb::RemoveBlobSharing(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) { + NIceDb::TNiceDb db(Database); + db.Table().Key(storageId, blobId.ToStringNew(), (ui64)tabletId).Delete(); +} + +void TBlobManagerDb::AddBlobSharing(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) { + NIceDb::TNiceDb db(Database); + db.Table().Key(storageId, blobId.ToStringNew(), (ui64)tabletId).Update(); +} + +void TBlobManagerDb::RemoveBorrowedBlob(const TString& storageId, const TUnifiedBlobId& blobId) { + NIceDb::TNiceDb db(Database); + db.Table().Key(storageId, blobId.ToStringNew()).Delete(); +} + +void TBlobManagerDb::AddBorrowedBlob(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) { + NIceDb::TNiceDb db(Database); + db.Table().Key(storageId, blobId.ToStringNew()).Update(NIceDb::TUpdate((ui64)tabletId)); +} + } diff --git a/ydb/core/tx/columnshard/blobs_action/blob_manager_db.h b/ydb/core/tx/columnshard/blobs_action/blob_manager_db.h index bdfc4ddd12f4..6c52e6c9ff2f 100644 --- a/ydb/core/tx/columnshard/blobs_action/blob_manager_db.h +++ b/ydb/core/tx/columnshard/blobs_action/blob_manager_db.h @@ -1,13 +1,16 @@ #pragma once +#include "abstract/blob_set.h" +#include "abstract/common.h" #include #include #include +#include namespace NKikimr::NTable { class TDatabase; } -namespace NKikimr::NColumnShard { +namespace NKikimr::NOlap { // Garbage Collection generation and step using TGenStep = std::tuple; @@ -16,22 +19,29 @@ class IBlobManagerDb { public: virtual ~IBlobManagerDb() = default; - virtual bool LoadLastGcBarrier(TGenStep& lastCollectedGenStep) = 0; + [[nodiscard]] virtual bool LoadLastGcBarrier(TGenStep& lastCollectedGenStep) = 0; virtual void SaveLastGcBarrier(const TGenStep& lastCollectedGenStep) = 0; - virtual bool LoadLists(std::vector& blobsToKeep, std::vector& blobsToDelete, - const NOlap::IBlobGroupSelector* dsGroupSelector) = 0; - virtual void AddBlobToKeep(const NOlap::TUnifiedBlobId& blobId) = 0; - virtual void EraseBlobToKeep(const NOlap::TUnifiedBlobId& blobId) = 0; - virtual void AddBlobToDelete(const NOlap::TUnifiedBlobId& blobId) = 0; - virtual void EraseBlobToDelete(const NOlap::TUnifiedBlobId& blobId) = 0; + [[nodiscard]] virtual bool LoadLists(std::vector& blobsToKeep, TTabletsByBlob& blobsToDelete, + const IBlobGroupSelector* dsGroupSelector, const TTabletId selfTabletId) = 0; + virtual void AddBlobToKeep(const TUnifiedBlobId& blobId) = 0; + virtual void EraseBlobToKeep(const TUnifiedBlobId& blobId) = 0; - virtual bool LoadTierLists(const TString& storageId, std::deque& blobsToDelete, std::deque& draftBlobsToDelete) = 0; + virtual void AddBlobToDelete(const TUnifiedBlobId& blobId, const TTabletId tabletId) = 0; + virtual void EraseBlobToDelete(const TUnifiedBlobId& blobId, const TTabletId tabletId) = 0; - virtual void AddTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) = 0; - virtual void RemoveTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) = 0; - virtual void AddTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) = 0; - virtual void RemoveTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) = 0; + [[nodiscard]] virtual bool LoadTierLists(const TString& storageId, TTabletsByBlob& blobsToDelete, std::deque& draftBlobsToDelete, const TTabletId selfTabletId) = 0; + + virtual void AddTierBlobToDelete(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) = 0; + virtual void RemoveTierBlobToDelete(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) = 0; + virtual void AddTierDraftBlobId(const TString& storageId, const TUnifiedBlobId& blobId) = 0; + virtual void RemoveTierDraftBlobId(const TString& storageId, const TUnifiedBlobId& blobId) = 0; + + virtual void AddBlobSharing(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) = 0; + virtual void RemoveBlobSharing(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) = 0; + + virtual void AddBorrowedBlob(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) = 0; + virtual void RemoveBorrowedBlob(const TString& storageId, const TUnifiedBlobId& blobId) = 0; }; @@ -41,25 +51,34 @@ class TBlobManagerDb : public IBlobManagerDb { : Database(db) {} - bool LoadLastGcBarrier(TGenStep& lastCollectedGenStep) override; + [[nodiscard]] bool LoadLastGcBarrier(TGenStep& lastCollectedGenStep) override; void SaveLastGcBarrier(const TGenStep& lastCollectedGenStep) override; - bool LoadLists(std::vector& blobsToKeep, std::vector& blobsToDelete, - const NOlap::IBlobGroupSelector* dsGroupSelector) override; + [[nodiscard]] bool LoadLists(std::vector& blobsToKeep, TTabletsByBlob& blobsToDelete, + const IBlobGroupSelector* dsGroupSelector, const TTabletId selfTabletId) override; + + void AddBlobToKeep(const TUnifiedBlobId& blobId) override; + void EraseBlobToKeep(const TUnifiedBlobId& blobId) override; + void AddBlobToDelete(const TUnifiedBlobId& blobId, const TTabletId tabletId) override; + void EraseBlobToDelete(const TUnifiedBlobId& blobId, const TTabletId tabletId) override; + + [[nodiscard]] bool LoadTierLists(const TString& storageId, TTabletsByBlob& blobsToDelete, std::deque& draftBlobsToDelete, const TTabletId selfTabletId) override; - void AddBlobToKeep(const NOlap::TUnifiedBlobId& blobId) override; - void EraseBlobToKeep(const NOlap::TUnifiedBlobId& blobId) override; - void AddBlobToDelete(const NOlap::TUnifiedBlobId& blobId) override; - void EraseBlobToDelete(const NOlap::TUnifiedBlobId& blobId) override; + void AddTierBlobToDelete(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) override; + void RemoveTierBlobToDelete(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) override; - bool LoadTierLists(const TString& storageId, std::deque& blobsToDelete, std::deque& draftBlobsToDelete) override; + void AddTierDraftBlobId(const TString& storageId, const TUnifiedBlobId& blobId) override; + void RemoveTierDraftBlobId(const TString& storageId, const TUnifiedBlobId& blobId) override; - void AddTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) override; + void AddBlobSharing(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) override; + void RemoveBlobSharing(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) override; - void RemoveTierBlobToDelete(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) override; + void AddBorrowedBlob(const TString& storageId, const TUnifiedBlobId& blobId, const TTabletId tabletId) override; + void RemoveBorrowedBlob(const TString& storageId, const TUnifiedBlobId& blobId) override; - void AddTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) override; - void RemoveTierDraftBlobId(const TString& storageId, const NOlap::TUnifiedBlobId& blobId) override; + NTable::TDatabase& GetDatabase() { + return Database; + } private: NTable::TDatabase& Database; diff --git a/ydb/core/tx/columnshard/blob_manager.cpp b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp similarity index 52% rename from ydb/core/tx/columnshard/blob_manager.cpp rename to ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp index 83aa1a03f1fc..ed3810ae1edf 100644 --- a/ydb/core/tx/columnshard/blob_manager.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.cpp @@ -1,13 +1,10 @@ -#include "defs.h" -#include "columnshard_impl.h" #include "blob_manager.h" -#include "blob_cache.h" #include #include -#include "blobs_action/bs/gc.h" +#include "gc.h" -namespace NKikimr::NColumnShard { +namespace NKikimr::NOlap { TLogoBlobID ParseLogoBlobId(TString blobId) { TLogoBlobID logoBlobId; @@ -28,7 +25,7 @@ struct TBlobBatch::TBatchInfo : TNonCopyable { TIntrusivePtr TabletInfo; TAllocatedGenStepConstPtr GenStepRef; - const TBlobsManagerCounters Counters; + const NColumnShard::TBlobsManagerCounters Counters; const ui32 Gen; const ui32 Step; const ui32 Channel; @@ -37,7 +34,7 @@ struct TBlobBatch::TBatchInfo : TNonCopyable { i32 InFlightCount; ui64 TotalSizeBytes; - TBatchInfo(TIntrusivePtr tabletInfo, TAllocatedGenStepConstPtr genStep, ui32 channel, const TBlobsManagerCounters& counters) + TBatchInfo(TIntrusivePtr tabletInfo, TAllocatedGenStepConstPtr genStep, ui32 channel, const NColumnShard::TBlobsManagerCounters& counters) : TabletInfo(tabletInfo) , GenStepRef(genStep) , Counters(counters) @@ -83,7 +80,7 @@ void TBlobBatch::SendWriteRequest(const TActorContext& ctx, ui32 groupId, const } void TBlobBatch::SendWriteBlobRequest(const TString& blobData, const TUnifiedBlobId& blobId, TInstant deadline, const TActorContext& ctx) { - Y_ABORT_UNLESS(blobData.size() <= TLimits::GetBlobSizeLimit(), "Blob %" PRISZT" size exceeds the limit %" PRIu64, blobData.size(), TLimits::GetBlobSizeLimit()); + Y_ABORT_UNLESS(blobData.size() <= NColumnShard::TLimits::GetBlobSizeLimit(), "Blob %" PRISZT" size exceeds the limit %" PRIu64, blobData.size(), NColumnShard::TLimits::GetBlobSizeLimit()); const ui32 groupId = blobId.GetDsGroup(); SendWriteRequest(ctx, groupId, blobId.GetLogoBlobId(), blobData, 0, deadline); @@ -125,8 +122,9 @@ TUnifiedBlobId TBlobBatch::AllocateNextBlobId(const TString& blobData) { return BatchInfo->NextBlobId(blobData.size()); } -TBlobManager::TBlobManager(TIntrusivePtr tabletInfo, ui32 gen) - : TabletInfo(tabletInfo) +TBlobManager::TBlobManager(TIntrusivePtr tabletInfo, ui32 gen, const TTabletId selfTabletId) + : SelfTabletId(selfTabletId) + , TabletInfo(tabletInfo) , CurrentGen(gen) , CurrentStep(0) , BlobCountToTriggerGC(BLOB_COUNT_TO_TRIGGER_GC_DEFAULT, 0, Max()) @@ -138,7 +136,7 @@ void TBlobManager::RegisterControls(NKikimr::TControlBoard& icb) { icb.RegisterSharedControl(GCIntervalSeconds, "ColumnShardControls.GCIntervalSeconds"); } -bool TBlobManager::LoadState(IBlobManagerDb& db) { +bool TBlobManager::LoadState(IBlobManagerDb& db, const TTabletId selfTabletId) { // Load last collected Generation if (!db.LoadLastGcBarrier(LastCollectedGenStep)) { return false; @@ -146,37 +144,29 @@ bool TBlobManager::LoadState(IBlobManagerDb& db) { // Load the keep and delete queues std::vector blobsToKeep; - std::vector blobsToDelete; - TBlobGroupSelector dsGroupSelector(TabletInfo); - if (!db.LoadLists(blobsToKeep, blobsToDelete, &dsGroupSelector)) { + NColumnShard::TBlobGroupSelector dsGroupSelector(TabletInfo); + if (!db.LoadLists(blobsToKeep, BlobsToDelete, &dsGroupSelector, selfTabletId)) { return false; } - for (const auto& unifiedBlobId : blobsToDelete) { - if (unifiedBlobId.IsDsBlob()) { - BlobsToDelete.insert(unifiedBlobId.GetLogoBlobId()); - BlobsManagerCounters.OnDeleteBlobMarker(unifiedBlobId.BlobSize()); - } else { - Y_ABORT("Unexpected blob id: %s", unifiedBlobId.ToStringNew().c_str()); - } + for (auto it = BlobsToDelete.GetIterator(); it.IsValid(); ++it) { + BlobsManagerCounters.OnDeleteBlobMarker(it.GetBlobId().BlobSize()); } BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); // Build the list of steps that cannot be garbage collected before Keep flag is set on the blobs THashSet genStepsWithBlobsToKeep; for (const auto& unifiedBlobId : blobsToKeep) { - Y_ABORT_UNLESS(unifiedBlobId.IsDsBlob(), "Not a DS blob id in Keep table: %s", unifiedBlobId.ToStringNew().c_str()); - TLogoBlobID blobId = unifiedBlobId.GetLogoBlobId(); TGenStep genStep{blobId.Generation(), blobId.Step()}; Y_ABORT_UNLESS(genStep > LastCollectedGenStep); BlobsToKeep.insert(blobId); BlobsManagerCounters.OnKeepMarker(blobId.BlobSize()); - + const ui64 groupId = dsGroupSelector.GetGroup(blobId); // Keep + DontKeep (probably in different gen:steps) // GC could go through it to a greater LastCollectedGenStep - if (BlobsToDelete.contains(blobId)) { + if (BlobsToDelete.Contains(SelfTabletId, TUnifiedBlobId(groupId, blobId))) { continue; } @@ -197,44 +187,51 @@ bool TBlobManager::LoadState(IBlobManagerDb& db) { return true; } -TGenStep TBlobManager::FindNewGCBarrier() { +void TBlobManager::PopGCBarriers(const TGenStep gs) { + while (AllocatedGenSteps.size() && AllocatedGenSteps.front()->GenStep <= gs) { + AllocatedGenSteps.pop_front(); + } +} + +std::vector TBlobManager::FindNewGCBarriers() { + AFL_VERIFY(!CollectGenStepInFlight); TGenStep newCollectGenStep = LastCollectedGenStep; - size_t numFinished = 0; + std::vector result; + if (AllocatedGenSteps.empty()) { + return {TGenStep(CurrentGen, CurrentStep)}; + } for (auto& allocated : AllocatedGenSteps) { + AFL_VERIFY(allocated->GenStep > newCollectGenStep); if (!allocated->Finished()) { break; } - - ++numFinished; + result.emplace_back(allocated->GenStep); newCollectGenStep = allocated->GenStep; - Y_ABORT_UNLESS(newCollectGenStep > CollectGenStepInFlight); - } - if (numFinished) { - AllocatedGenSteps.erase(AllocatedGenSteps.begin(), AllocatedGenSteps.begin() + numFinished); } - - if (AllocatedGenSteps.empty()) { - newCollectGenStep = TGenStep{CurrentGen, CurrentStep}; - } - return newCollectGenStep; + return result; } -std::shared_ptr TBlobManager::BuildGCTask(const TString& storageId, const std::shared_ptr& manager) { - if (BlobsToKeep.empty() && BlobsToDelete.empty() && LastCollectedGenStep == TGenStep{CurrentGen, CurrentStep}) { +std::shared_ptr TBlobManager::BuildGCTask(const TString& storageId, + const std::shared_ptr& manager, const std::shared_ptr& sharedBlobsInfo, + const std::shared_ptr& counters) noexcept { + AFL_VERIFY(!CollectGenStepInFlight); + if (BlobsToKeep.empty() && BlobsToDelete.IsEmpty() && LastCollectedGenStep == TGenStep{CurrentGen, CurrentStep}) { ACFL_DEBUG("event", "TBlobManager::BuildGCTask skip")("current_gen", CurrentGen)("current_step", CurrentStep); return nullptr; } + std::vector newCollectGenSteps = FindNewGCBarriers(); - TGenStep newCollectGenStep = FindNewGCBarrier(); - Y_ABORT_UNLESS(newCollectGenStep >= LastCollectedGenStep); + if (newCollectGenSteps.size()) { + if (AllocatedGenSteps.size()) { + AFL_VERIFY(newCollectGenSteps.front() > LastCollectedGenStep); + } else { + AFL_VERIFY(newCollectGenSteps.front() == LastCollectedGenStep); + } + } PreviousGCTime = AppData()->TimeProvider->Now(); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "PreparePerGroupGCRequests")("gen", std::get<0>(newCollectGenStep))("step", std::get<1>(newCollectGenStep)); - BlobsManagerCounters.OnNewCollectStep(std::get<0>(newCollectGenStep), std::get<1>(newCollectGenStep)); const ui32 channelIdx = BLOB_CHANNEL; - - NOlap::NBlobOperations::NBlobStorage::TGCTask::TGCListsByGroup perGroupGCListsInFlight; - + NBlobOperations::NBlobStorage::TGCTask::TGCListsByGroup perGroupGCListsInFlight; // Clear all possibly not kept trash in channel's groups: create an event for each group if (FirstGC) { FirstGC = false; @@ -247,64 +244,119 @@ std::shared_ptr TBlobManager::Bui } } - // Make per-group Keep/DontKeep lists - std::deque keepsToErase; - std::deque deletesToErase; - { - // Add all blobs to keep - auto keepBlobIt = BlobsToKeep.begin(); - for (; keepBlobIt != BlobsToKeep.end(); ++keepBlobIt) { - TGenStep genStep{keepBlobIt->Generation(), keepBlobIt->Step()}; - if (genStep > newCollectGenStep) { - break; - } - ui32 blobGroup = TabletInfo->GroupFor(keepBlobIt->Channel(), keepBlobIt->Generation()); - perGroupGCListsInFlight[blobGroup].KeepList.insert(*keepBlobIt); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_keep_gc", *keepBlobIt); + static const ui32 blobsGCCountLimit = 50000; + + const auto predShared = [&](const TUnifiedBlobId& id, const THashSet& /*tabletIds*/) { + return id.GetLogoBlobId().TabletID() != (ui64)SelfTabletId; + }; + + TTabletsByBlob extractedToRemoveFromDB = BlobsToDelete.ExtractBlobs(predShared, blobsGCCountLimit); + if (extractedToRemoveFromDB.GetSize() >= blobsGCCountLimit) { + newCollectGenSteps.clear(); + } else { + const auto predRemoveOld = [&](const TUnifiedBlobId& id, const THashSet& /*tabletIds*/) { + auto logoBlobId = id.GetLogoBlobId(); + TGenStep genStep{logoBlobId.Generation(), logoBlobId.Step()}; + return genStep < LastCollectedGenStep && id.GetLogoBlobId().TabletID() == (ui64)SelfTabletId; + }; + + TTabletsByBlob extractedOld = BlobsToDelete.ExtractBlobs(predRemoveOld, blobsGCCountLimit - extractedToRemoveFromDB.GetSize()); + extractedToRemoveFromDB.Add(extractedOld); + TTabletId tabletId; + TUnifiedBlobId unifiedBlobId; + while (extractedOld.ExtractFront(tabletId, unifiedBlobId)) { + auto logoBlobId = unifiedBlobId.GetLogoBlobId(); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_delete_gc", logoBlobId); + NBlobOperations::NBlobStorage::TGCTask::TGCLists& gl = perGroupGCListsInFlight[unifiedBlobId.GetDsGroup()]; + BlobsManagerCounters.OnCollectDropExplicit(logoBlobId.BlobSize()); + gl.DontKeepList.insert(logoBlobId); } - BlobsToKeep.erase(BlobsToKeep.begin(), keepBlobIt); - BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); - - // Add all blobs to delete - auto blobIt = BlobsToDelete.begin(); - for (; blobIt != BlobsToDelete.end(); ++blobIt) { - TGenStep genStep{blobIt->Generation(), blobIt->Step()}; - if (genStep > newCollectGenStep) { - break; - } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_delete_gc", *blobIt); - ui32 blobGroup = TabletInfo->GroupFor(blobIt->Channel(), blobIt->Generation()); - NOlap::NBlobOperations::NBlobStorage::TGCTask::TGCLists& gl = perGroupGCListsInFlight[blobGroup]; - bool skipDontKeep = false; - if (gl.KeepList.erase(*blobIt)) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_keep_gc_remove", *blobIt); - // Skipped blobs still need to be deleted from BlobsToKeep table - keepsToErase.emplace_back(TUnifiedBlobId(blobGroup, *blobIt)); - - if (CurrentGen == blobIt->Generation()) { - // If this blob was created and deleted in the current generation then - // we can skip sending both Keep and DontKeep flags. - // NOTE: its not safe to do this for older generations because there is - // a scenario when Keep flag was sent in the old generation and then tablet restarted - // before getting the result and removing the blob from the Keep list. - skipDontKeep = true; - deletesToErase.emplace_back(TUnifiedBlobId(blobGroup, *blobIt)); - ++CountersUpdate.BlobSkippedEntries; + if (extractedToRemoveFromDB.GetSize() >= blobsGCCountLimit) { + newCollectGenSteps.clear(); + } + } + + + std::deque keepsToErase; + for (auto&& newCollectGenStep : newCollectGenSteps) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "PreparePerGroupGCRequests")("gen", std::get<0>(newCollectGenStep))("step", std::get<1>(newCollectGenStep)); + BlobsManagerCounters.OnNewCollectStep(std::get<0>(newCollectGenStep), std::get<1>(newCollectGenStep)); + + // Make per-group Keep/DontKeep lists + + { + // Add all blobs to keep + auto keepBlobIt = BlobsToKeep.begin(); + for (; keepBlobIt != BlobsToKeep.end(); ++keepBlobIt) { + TGenStep genStep{keepBlobIt->Generation(), keepBlobIt->Step()}; + AFL_VERIFY(genStep > LastCollectedGenStep); + if (genStep > newCollectGenStep) { + break; } + ui32 blobGroup = TabletInfo->GroupFor(keepBlobIt->Channel(), keepBlobIt->Generation()); + perGroupGCListsInFlight[blobGroup].KeepList.insert(*keepBlobIt); + keepsToErase.emplace_back(TUnifiedBlobId(blobGroup, *keepBlobIt)); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_keep_gc", *keepBlobIt); } - if (!skipDontKeep) { - BlobsManagerCounters.OnCollectDropExplicit(blobIt->BlobSize()); - gl.DontKeepList.insert(*blobIt); - } else { - BlobsManagerCounters.OnCollectDropImplicit(blobIt->BlobSize()); + BlobsToKeep.erase(BlobsToKeep.begin(), keepBlobIt); + BlobsManagerCounters.OnBlobsKeep(BlobsToKeep); + + const auto predSelf = [&](const TUnifiedBlobId& id, const THashSet& /*tabletIds*/) { + auto logoBlobId = id.GetLogoBlobId(); + TGenStep genStep{logoBlobId.Generation(), logoBlobId.Step()}; + return genStep <= newCollectGenStep && id.GetLogoBlobId().TabletID() == (ui64)SelfTabletId; + }; + TTabletsByBlob extractedSelf = BlobsToDelete.ExtractBlobs(predSelf); + extractedToRemoveFromDB.Add(extractedSelf); + TTabletId tabletId; + TUnifiedBlobId unifiedBlobId; + while (extractedSelf.ExtractFront(tabletId, unifiedBlobId)) { + auto logoBlobId = unifiedBlobId.GetLogoBlobId(); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_delete_gc", logoBlobId); + NBlobOperations::NBlobStorage::TGCTask::TGCLists& gl = perGroupGCListsInFlight[unifiedBlobId.GetDsGroup()]; + bool skipDontKeep = false; + if (gl.KeepList.erase(logoBlobId)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_keep_gc_remove", logoBlobId); + // Skipped blobs still need to be deleted from BlobsToKeep table + if (CurrentGen == logoBlobId.Generation()) { + // If this blob was created and deleted in the current generation then + // we can skip sending both Keep and DontKeep flags. + // NOTE: its not safe to do this for older generations because there is + // a scenario when Keep flag was sent in the old generation and then tablet restarted + // before getting the result and removing the blob from the Keep list. + skipDontKeep = true; + ++CountersUpdate.BlobSkippedEntries; + } + } + if (!skipDontKeep) { + BlobsManagerCounters.OnCollectDropExplicit(logoBlobId.BlobSize()); + gl.DontKeepList.insert(logoBlobId); + } else { + BlobsManagerCounters.OnCollectDropImplicit(logoBlobId.BlobSize()); + } } + BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); } - BlobsToDelete.erase(BlobsToDelete.begin(), blobIt); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); + CollectGenStepInFlight = newCollectGenStep; + if (extractedToRemoveFromDB.GetSize() + keepsToErase.size() > blobsGCCountLimit) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "a lot of blobs to gc")("to_remove", extractedToRemoveFromDB.GetSize())("keeps_to_erase", keepsToErase.size())("limit", blobsGCCountLimit); + break; + } + } + if (CollectGenStepInFlight) { + PopGCBarriers(*CollectGenStepInFlight); + } else { + CollectGenStepInFlight = LastCollectedGenStep; } + auto removeCategories = sharedBlobsInfo->BuildRemoveCategories(std::move(extractedToRemoveFromDB)); - CollectGenStepInFlight = newCollectGenStep; - return std::make_shared(storageId, std::move(perGroupGCListsInFlight), newCollectGenStep, std::move(keepsToErase), std::move(deletesToErase), manager); + auto result = std::make_shared(storageId, std::move(perGroupGCListsInFlight), *CollectGenStepInFlight, + std::move(keepsToErase), manager, std::move(removeCategories), counters, TabletInfo->TabletID, CurrentGen); + if (result->IsEmpty()) { + CollectGenStepInFlight = {}; + return nullptr; + } + return result; } TBlobBatch TBlobManager::StartBlobBatch(ui32 channel) { @@ -329,8 +381,6 @@ void TBlobManager::DoSaveBlobBatch(TBlobBatch&& blobBatch, IBlobManagerDb& db) { // Add this batch to KeepQueue TGenStep edgeGenStep = EdgeGenStep(); for (auto&& blobId: blobBatch.BatchInfo->GetBlobIds()) { - Y_DEBUG_ABORT_UNLESS(blobId.IsDsBlob(), "Not a DS blob id: %s", blobId.ToStringNew().c_str()); - auto logoBlobId = blobId.GetLogoBlobId(); TGenStep genStep{logoBlobId.Generation(), logoBlobId.Step()}; @@ -346,47 +396,46 @@ void TBlobManager::DoSaveBlobBatch(TBlobBatch&& blobBatch, IBlobManagerDb& db) { blobBatch.BatchInfo->GenStepRef.Reset(); } -void TBlobManager::DeleteBlobOnExecute(const TUnifiedBlobId& blobId, IBlobManagerDb& db) { +void TBlobManager::DeleteBlobOnExecute(const TTabletId tabletId, const TUnifiedBlobId& blobId, IBlobManagerDb& db) { // Persist deletion intent - db.AddBlobToDelete(blobId); + db.AddBlobToDelete(blobId, tabletId); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_delete_on_execute", blobId); } -void TBlobManager::DeleteBlobOnComplete(const TUnifiedBlobId& blobId) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_delete_on_complete", blobId); +void TBlobManager::DeleteBlobOnComplete(const TTabletId tabletId, const TUnifiedBlobId& blobId) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("to_delete_on_complete", blobId)("tablet_id_delete", (ui64)tabletId); ++CountersUpdate.BlobsDeleted; // Check if the deletion needs to be delayed until the blob is no longer // used by in-flight requests if (!IsBlobInUsage(blobId)) { LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delete Blob " << blobId); - TLogoBlobID logoBlobId = blobId.GetLogoBlobId(); - if (BlobsToDelete.emplace(logoBlobId).second) { - BlobsManagerCounters.OnDeleteBlobMarker(blobId.BlobSize()); - BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); - } + Y_UNUSED(BlobsToDelete.Add(tabletId, blobId)); + BlobsManagerCounters.OnDeleteBlobMarker(blobId.BlobSize()); + BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); } else { BlobsManagerCounters.OnDeleteBlobDelayedMarker(blobId.BlobSize()); LOG_S_DEBUG("BlobManager at tablet " << TabletInfo->TabletID << " Delay Delete Blob " << blobId); - BlobsToDeleteDelayed.insert(blobId.GetLogoBlobId()); + BlobsToDeleteDelayed.Add(tabletId, blobId); } } -void TBlobManager::OnGCFinished(const TGenStep& genStep, IBlobManagerDb& db) { +void TBlobManager::OnGCFinishedOnExecute(const TGenStep& genStep, IBlobManagerDb& db) { + db.SaveLastGcBarrier(genStep); +} + +void TBlobManager::OnGCFinishedOnComplete(const TGenStep& genStep) { LastCollectedGenStep = genStep; - db.SaveLastGcBarrier(LastCollectedGenStep); CollectGenStepInFlight.reset(); } void TBlobManager::OnBlobFree(const TUnifiedBlobId& blobId) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "blob_free")("blob_id", blobId); // Check if the blob is marked for delayed deletion - const TLogoBlobID logoBlobId = blobId.GetLogoBlobId(); - if (BlobsToDeleteDelayed.erase(logoBlobId)) { + if (BlobsToDeleteDelayed.ExtractBlobTo(blobId, BlobsToDelete)) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("blob_id", blobId)("event", "blob_delayed_deleted"); - BlobsToDelete.insert(logoBlobId); BlobsManagerCounters.OnBlobsDelete(BlobsToDelete); - BlobsManagerCounters.OnDeleteBlobMarker(logoBlobId.BlobSize()); + BlobsManagerCounters.OnDeleteBlobMarker(blobId.GetLogoBlobId().BlobSize()); } } diff --git a/ydb/core/tx/columnshard/blob_manager.h b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h similarity index 68% rename from ydb/core/tx/columnshard/blob_manager.h rename to ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h index a769a417aab7..c1228ea97c76 100644 --- a/ydb/core/tx/columnshard/blob_manager.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/blob_manager.h @@ -1,9 +1,10 @@ #pragma once -#include "blob.h" -#include "blobs_action/blob_manager_db.h" -#include "blobs_action/abstract/storage.h" -#include "counters/blobs_manager.h" +#include +#include +#include +#include +#include #include #include @@ -15,12 +16,8 @@ namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { class TGCTask; } -namespace NKikimr::NColumnShard { +namespace NKikimr::NOlap { -using NOlap::TUnifiedBlobId; -using NOlap::TBlobRange; -using NOlap::TEvictedBlob; -using NOlap::EEvictState; using NKikimrTxColumnShard::TEvictMetadata; @@ -89,29 +86,8 @@ class IBlobManager { return DoSaveBlobBatch(std::move(blobBatch), db); } - virtual void DeleteBlobOnExecute(const TUnifiedBlobId& blobId, IBlobManagerDb& db) = 0; - virtual void DeleteBlobOnComplete(const TUnifiedBlobId& blobId) = 0; -}; - -// An interface for exporting and caching exported blobs out of ColumnShard index to external storages like S3. -// Just do not mix it with IBlobManager that use out storage model. -class IBlobExporter { -protected: - ~IBlobExporter() = default; - -public: - // Lazily export blob to external object store. Keep it available via blobId. - virtual bool ExportOneToOne(TEvictedBlob&& evict, const TEvictMetadata& meta, IBlobManagerDb& db) = 0; - virtual bool DropOneToOne(const TUnifiedBlobId& blobId, IBlobManagerDb& db) = 0; - virtual bool UpdateOneToOne(TEvictedBlob& evict, IBlobManagerDb& db, bool& dropped) = 0; - virtual bool EraseOneToOne(const TEvictedBlob& evict, IBlobManagerDb& db) = 0; - virtual bool LoadOneToOneExport(IBlobManagerDb& db, THashSet& droppedEvicting) = 0; - virtual TEvictedBlob GetEvicted(const TUnifiedBlobId& blob, TEvictMetadata& meta) = 0; - virtual TEvictedBlob GetDropped(const TUnifiedBlobId& blobId, TEvictMetadata& meta) = 0; - virtual void GetCleanupBlobs(THashMap>& tierBlobs, - const THashSet& allowList = {}) const = 0; - virtual void GetReexportBlobs(THashMap>& tierBlobs) const = 0; - virtual bool HasExternBlobs() const = 0; + virtual void DeleteBlobOnExecute(const TTabletId tabletId, const TUnifiedBlobId& blobId, IBlobManagerDb& db) = 0; + virtual void DeleteBlobOnComplete(const TTabletId tabletId, const TUnifiedBlobId& blobId) = 0; }; // A ref-counted object to keep track when GC barrier can be moved to some step. @@ -143,12 +119,13 @@ struct TBlobManagerCounters { }; // The implementation of BlobManager that hides all GC-related details -class TBlobManager : public IBlobManager, public NOlap::TCommonBlobsTracker { +class TBlobManager : public IBlobManager, public TCommonBlobsTracker { private: static constexpr size_t BLOB_COUNT_TO_TRIGGER_GC_DEFAULT = 1000; static constexpr ui64 GC_INTERVAL_SECONDS_DEFAULT = 60; private: + const TTabletId SelfTabletId; TIntrusivePtr TabletInfo; const ui32 CurrentGen; ui32 CurrentStep; @@ -158,10 +135,10 @@ class TBlobManager : public IBlobManager, public NOlap::TCommonBlobsTracker { // Lists of blobs that need Keep flag to be set TSet BlobsToKeep; // Lists of blobs that need DoNotKeep flag to be set - TSet BlobsToDelete; + TTabletsByBlob BlobsToDelete; // List of blobs that are marked for deletion but are still used by in-flight requests - TSet BlobsToDeleteDelayed; + TTabletsByBlob BlobsToDeleteDelayed; // Sorted queue of GenSteps that have in-flight BlobBatches TDeque AllocatedGenSteps; @@ -172,26 +149,34 @@ class TBlobManager : public IBlobManager, public NOlap::TCommonBlobsTracker { // The barrier in the current in-flight GC request(s) bool FirstGC = true; - const TBlobsManagerCounters BlobsManagerCounters = TBlobsManagerCounters("BlobsManager"); + const NColumnShard::TBlobsManagerCounters BlobsManagerCounters = NColumnShard::TBlobsManagerCounters("BlobsManager"); // Stores counter updates since last call to GetCountersUpdate() // Then the counters are reset and start accumulating new delta TBlobManagerCounters CountersUpdate; - ui64 PerGenerationCounter = 1; - TInstant PreviousGCTime; // Used for delaying next GC if there are too few blobs to collect virtual void DoSaveBlobBatch(TBlobBatch&& blobBatch, IBlobManagerDb& db) override; public: - TBlobManager(TIntrusivePtr tabletInfo, ui32 gen); + TBlobManager(TIntrusivePtr tabletInfo, const ui32 gen, const TTabletId selfTabletId); + + TTabletsByBlob GetBlobsToDeleteAll() const { + auto result = BlobsToDelete; + result.Add(BlobsToDeleteDelayed); + return result; + } virtual void OnBlobFree(const TUnifiedBlobId& blobId) override; - const TBlobsManagerCounters& GetCounters() const { + const NColumnShard::TBlobsManagerCounters& GetCounters() const { return BlobsManagerCounters; } + TTabletId GetSelfTabletId() const { + return SelfTabletId; + } + ui64 GetTabletId() const { return TabletInfo->TabletID; } @@ -203,12 +188,15 @@ class TBlobManager : public IBlobManager, public NOlap::TCommonBlobsTracker { void RegisterControls(NKikimr::TControlBoard& icb); // Loads the state at startup - bool LoadState(IBlobManagerDb& db); + bool LoadState(IBlobManagerDb& db, const TTabletId selfTabletId); // Prepares Keep/DontKeep lists and GC barrier - std::shared_ptr BuildGCTask(const TString& storageId, const std::shared_ptr& manager); + std::shared_ptr BuildGCTask(const TString& storageId, + const std::shared_ptr& manager, const std::shared_ptr& sharedBlobsInfo, + const std::shared_ptr& counters) noexcept; - void OnGCFinished(const TGenStep& genStep, IBlobManagerDb& db); + void OnGCFinishedOnExecute(const TGenStep& genStep, IBlobManagerDb& db); + void OnGCFinishedOnComplete(const TGenStep& genStep); TBlobManagerCounters GetCountersUpdate() { TBlobManagerCounters res = CountersUpdate; @@ -218,10 +206,12 @@ class TBlobManager : public IBlobManager, public NOlap::TCommonBlobsTracker { // Implementation of IBlobManager interface TBlobBatch StartBlobBatch(ui32 channel = BLOB_CHANNEL) override; - void DeleteBlobOnExecute(const TUnifiedBlobId& blobId, IBlobManagerDb& db) override; - void DeleteBlobOnComplete(const TUnifiedBlobId& blobId) override; + virtual void DeleteBlobOnExecute(const TTabletId tabletId, const TUnifiedBlobId& blobId, IBlobManagerDb& db) override; + virtual void DeleteBlobOnComplete(const TTabletId tabletId, const TUnifiedBlobId& blobId) override; private: - TGenStep FindNewGCBarrier(); + std::vector FindNewGCBarriers(); + void PopGCBarriers(const TGenStep gs); + void PopGCBarriers(const ui32 count); bool ExtractEvicted(TEvictedBlob& evict, TEvictMetadata& meta, bool fromDropped = false); diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp index 7d6a7456701b..4ee6598a206d 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.cpp @@ -2,99 +2,64 @@ #include "storage.h" #include #include -#include #include namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { -void TGCTask::DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs) { - size_t numBlobs = 0; - - for (; KeepsToErase.size() && numBlobs < NColumnShard::TLimits::MAX_BLOBS_TO_DELETE; ++numBlobs) { - dbBlobs.EraseBlobToKeep(KeepsToErase.front()); - KeepsToErase.pop_front(); - } +void TGCTask::RemoveBlobIdFromDB(const TTabletId tabletId, const TUnifiedBlobId& blobId, TBlobManagerDb& dbBlobs) { + dbBlobs.EraseBlobToDelete(blobId, tabletId); +} - for (; DeletesToErase.size() && numBlobs < NColumnShard::TLimits::MAX_BLOBS_TO_DELETE; ++numBlobs) { - dbBlobs.EraseBlobToDelete(DeletesToErase.front()); - DeletesToErase.pop_front(); - } - if (KeepsToErase.empty() && DeletesToErase.empty()) { - Manager->OnGCFinished(CollectGenStepInFlight, dbBlobs); +void TGCTask::DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, TBlobManagerDb& dbBlobs) { + for (auto&& i : KeepsToErase) { + dbBlobs.EraseBlobToKeep(i); } + Manager->OnGCFinishedOnExecute(CollectGenStepInFlight, dbBlobs); } -bool TGCTask::DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction) { - if (KeepsToErase.size() || DeletesToErase.size()) { - TActorContext::AsActorContext().Send(self.SelfId(), std::make_unique(taskAction)); - return false; - } else { - return true; - } +bool TGCTask::DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, const std::shared_ptr& /*taskAction*/) { + Manager->OnGCFinishedOnComplete(CollectGenStepInFlight); + return true; } -TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const NColumnShard::TGenStep& collectGenStepInFlight, std::deque&& keepsToErase, std::deque&& deletesToErase, - const std::shared_ptr& manager) - : TBase(storageId) +TGCTask::TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const TGenStep& collectGenStepInFlight, std::deque&& keepsToErase, + const std::shared_ptr& manager, TBlobsCategories&& blobsToRemove, const std::shared_ptr& counters, + const ui64 tabletId, const ui64 currentGen) + : TBase(storageId, std::move(blobsToRemove), counters) , ListsByGroupId(std::move(listsByGroupId)) , CollectGenStepInFlight(collectGenStepInFlight) + , TabletId(tabletId) + , CurrentGen(currentGen) , KeepsToErase(std::move(keepsToErase)) - , DeletesToErase(std::move(deletesToErase)) , Manager(manager) { } void TGCTask::OnGCResult(TEvBlobStorage::TEvCollectGarbageResult::TPtr ev) { AFL_VERIFY(ev->Get()->Status == NKikimrProto::OK)("status", ev->Get()->Status)("details", ev->Get()->ToString())("action_id", GetActionGuid()); - - // Find the group for this result - ui64 counterFromRequest = ev->Get()->PerGenerationCounter; - auto itCounter = CounterToGroupInFlight.find(counterFromRequest); - Y_ABORT_UNLESS(itCounter != CounterToGroupInFlight.end()); - const ui32 group = itCounter->second; - - auto itGroup = ListsByGroupId.find(group); + auto itGroup = ListsByGroupId.find(ev->Cookie); Y_ABORT_UNLESS(itGroup != ListsByGroupId.end()); - const auto& keepList = itGroup->second.KeepList; - const auto& dontKeepList = itGroup->second.DontKeepList; - - for (auto&& i : dontKeepList) { - Counters->OnReply(i.BlobSize()); - } - - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "OnGCResult")("keep_list", keepList.size())("dont_keep_list", dontKeepList.size()); - - for (const auto& blobId : keepList) { - KeepsToErase.emplace_back(TUnifiedBlobId(group, blobId)); - } - for (const auto& blobId : dontKeepList) { - DeletesToErase.emplace_back(TUnifiedBlobId(group, blobId)); - } - ListsByGroupId.erase(itGroup); - CounterToGroupInFlight.erase(itCounter); } -THashMap> TGCTask::BuildRequests(ui64& perGenerationCounter, const ui64 tabletId, const ui64 currentGen) { - const ui32 channelIdx = NColumnShard::IBlobManager::BLOB_CHANNEL; - // Make per group requests - THashMap> requests; - for (const auto& gl : ListsByGroupId) { - ui32 group = gl.first; - requests[group] = std::make_unique( - tabletId, currentGen, perGenerationCounter, - channelIdx, true, - std::get<0>(CollectGenStepInFlight), std::get<1>(CollectGenStepInFlight), - new TVector(gl.second.KeepList.begin(), gl.second.KeepList.end()), - new TVector(gl.second.DontKeepList.begin(), gl.second.DontKeepList.end()), - TInstant::Max(), true); - for (auto&& i : gl.second.DontKeepList) { - Counters->OnRequest(i.BlobSize()); - } - Y_ABORT_UNLESS(CounterToGroupInFlight.emplace(perGenerationCounter, group).second); - perGenerationCounter += requests[group]->PerGenerationCounterStepSize(); - } - return std::move(requests); +namespace { +static TAtomicCounter PerGenerationCounter = 1; +} + +std::unique_ptr TGCTask::BuildRequest(const ui64 groupId) const { + const ui32 channelIdx = IBlobManager::BLOB_CHANNEL; + auto it = ListsByGroupId.find(groupId); + AFL_VERIFY(it != ListsByGroupId.end()); + AFL_VERIFY(++it->second.RequestsCount < 10); + auto result = std::make_unique( + TabletId, CurrentGen, PerGenerationCounter.Val(), + channelIdx, true, + std::get<0>(CollectGenStepInFlight), std::get<1>(CollectGenStepInFlight), + new TVector(it->second.KeepList.begin(), it->second.KeepList.end()), + new TVector(it->second.DontKeepList.begin(), it->second.DontKeepList.end()), + TInstant::Max(), true); + result->PerGenerationCounter = PerGenerationCounter.Add(result->PerGenerationCounterStepSize()); + return std::move(result); } } diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc.h b/ydb/core/tx/columnshard/blobs_action/bs/gc.h index c8ee866acb63..86f348fe9fcd 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc.h @@ -1,9 +1,10 @@ #pragma once +#include "blob_manager.h" + #include #include #include -#include namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { @@ -14,31 +15,31 @@ class TGCTask: public IBlobsGCAction { struct TGCLists { THashSet KeepList; THashSet DontKeepList; + mutable ui32 RequestsCount = 0; }; using TGCListsByGroup = THashMap; private: TGCListsByGroup ListsByGroupId; - NColumnShard::TGenStep CollectGenStepInFlight; - // Maps PerGenerationCounter value to the group in PerGroupGCListsInFlight - THashMap CounterToGroupInFlight; + TGenStep CollectGenStepInFlight; + const ui64 TabletId; + const ui64 CurrentGen; std::deque KeepsToErase; - std::deque DeletesToErase; - std::shared_ptr Manager; - std::shared_ptr Counters; + std::shared_ptr Manager; protected: - virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) override; + virtual void RemoveBlobIdFromDB(const TTabletId tabletId, const TUnifiedBlobId& blobId, TBlobManagerDb& dbBlobs) override; + virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs) override; virtual bool DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction) override; -public: - bool IsEmpty() const { - return ListsByGroupId.empty(); + virtual bool DoIsEmpty() const override { + return false; } - void SetCounters(const std::shared_ptr& counters) { - Counters = counters; - } +public: + TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const TGenStep& collectGenStepInFlight, std::deque&& keepsToErase, + const std::shared_ptr& manager, TBlobsCategories&& blobsToRemove, const std::shared_ptr& counters, const ui64 tabletId, const ui64 currentGen); - TGCTask(const TString& storageId, TGCListsByGroup&& listsByGroupId, const NColumnShard::TGenStep& collectGenStepInFlight, std::deque&& keepsToErase, std::deque&& deletesToErase, - const std::shared_ptr& manager); + const TGCListsByGroup& GetListsByGroupId() const { + return ListsByGroupId; + } bool IsFinished() const { return ListsByGroupId.empty(); @@ -46,7 +47,7 @@ class TGCTask: public IBlobsGCAction { void OnGCResult(TEvBlobStorage::TEvCollectGarbageResult::TPtr ev); - THashMap> BuildRequests(ui64& perGenerationCounter, const ui64 tabletId, const ui64 currentGen); + std::unique_ptr BuildRequest(const ui64 groupId) const; }; } diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp b/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp index e0fd4d29f277..8c2e792024ee 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.cpp @@ -9,9 +9,17 @@ void TGarbageCollectionActor::Handle(TEvBlobStorage::TEvCollectGarbageResult::TP auto g = PassAwayGuard(); ACFL_WARN("event", "blocked_gc_event"); return; + } else if (ev->Get()->Status == NKikimrProto::OK) { + GCTask->OnGCResult(ev); + CheckFinished(); + } else { + ACFL_ERROR()("event", "GC_ERROR")("details", ev->Get()->Print(true)); + SendToBSProxy(NActors::TActivationContext::AsActorContext(), ev->Cookie, GCTask->BuildRequest(ev->Cookie).release(), ev->Cookie); } - GCTask->OnGCResult(ev); - if (GCTask->IsFinished()) { +} + +void TGarbageCollectionActor::CheckFinished() { + if (SharedRemovingFinished && GCTask->IsFinished()) { auto g = PassAwayGuard(); ACFL_DEBUG("actor", "TGarbageCollectionActor")("event", "finished"); TActorContext::AsActorContext().Send(TabletActorId, std::make_unique(GCTask)); diff --git a/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.h b/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.h index bcd5dfdbe06c..b2ebcfd35368 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/gc_actor.h @@ -1,22 +1,27 @@ #pragma once #include "gc.h" -#include +#include #include #include namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { -class TGarbageCollectionActor: public TActorBootstrapped { +class TGarbageCollectionActor: public TSharedBlobsCollectionActor { private: + using TBase = TSharedBlobsCollectionActor; const NActors::TActorId TabletActorId; - THashMap> Requests; std::shared_ptr GCTask; void Handle(TEvBlobStorage::TEvCollectGarbageResult::TPtr& ev); + void CheckFinished(); + + virtual void DoOnSharedRemovingFinished() override { + CheckFinished(); + } public: - TGarbageCollectionActor(const std::shared_ptr& task, THashMap>&& requests, const NActors::TActorId& tabletActorId) - : TabletActorId(tabletActorId) - , Requests(std::move(requests)) + TGarbageCollectionActor(const std::shared_ptr& task, const NActors::TActorId& tabletActorId, const TTabletId selfTabletId) + : TBase(task->GetStorageId(), selfTabletId, task->GetBlobsToRemove().GetBorrowed()) + , TabletActorId(tabletActorId) , GCTask(task) { @@ -26,14 +31,19 @@ class TGarbageCollectionActor: public TActorBootstrappedGetActionGuid()); switch (ev->GetTypeRewrite()) { hFunc(TEvBlobStorage::TEvCollectGarbageResult, Handle); + default: + TBase::StateWork(ev); } } void Bootstrap(const TActorContext& ctx) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "starting")("action_id", GCTask->GetActionGuid()); - for (auto&& i : Requests) { - SendToBSProxy(ctx, i.first, i.second.release()); + for (auto&& i : GCTask->GetListsByGroupId()) { + auto request = GCTask->BuildRequest(i.first); + AFL_VERIFY(request); + SendToBSProxy(ctx, i.first, request.release(), i.first); } + TBase::Bootstrap(ctx); Become(&TGarbageCollectionActor::StateWork); } }; diff --git a/ydb/core/tx/columnshard/blobs_action/bs/read.cpp b/ydb/core/tx/columnshard/blobs_action/bs/read.cpp index 89852a31f4d3..93d6e45200c4 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/read.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/read.cpp @@ -1,5 +1,14 @@ #include "read.h" +#include +#include namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { +void TReadingAction::DoStartReading(THashSet&& ranges) { + NBlobCache::TReadBlobRangeOptions readOpts{.CacheAfterRead = true, .IsBackgroud = GetIsBackgroundProcess(), .WithDeadline = false}; + std::vector rangesLocal(ranges.begin(), ranges.end()); + TActorContext::AsActorContext().Send(BlobCacheActorId, new NBlobCache::TEvBlobCache::TEvReadBlobRangeBatch(std::move(rangesLocal), std::move(readOpts))); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("blob_ids", JoinSeq(",", ranges))("count", ranges.size()); +} + } diff --git a/ydb/core/tx/columnshard/blobs_action/bs/read.h b/ydb/core/tx/columnshard/blobs_action/bs/read.h index e21c66866a05..1bd6bbc03dfc 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/read.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/read.h @@ -1,8 +1,6 @@ #pragma once #include -#include -#include namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { @@ -11,13 +9,9 @@ class TReadingAction: public IBlobsReadingAction { using TBase = IBlobsReadingAction; const TActorId BlobCacheActorId; protected: - virtual void DoStartReading(const THashMap>& ranges) override { - for (auto&& i : ranges) { - NBlobCache::TReadBlobRangeOptions readOpts{.CacheAfterRead = true, .IsBackgroud = GetIsBackgroundProcess(), .WithDeadline = false}; - std::vector rangesLocal(i.second.begin(), i.second.end()); - TActorContext::AsActorContext().Send(BlobCacheActorId, new NBlobCache::TEvBlobCache::TEvReadBlobRangeBatch(std::move(rangesLocal), std::move(readOpts))); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("blob_id", i.first)("count", i.second.size()); - } + virtual void DoStartReading(THashSet&& ranges) override; + virtual THashMap> GroupBlobsForOptimization(std::vector&& ranges) const override { + return TBlobsGlueing::GroupRanges(std::move(ranges), TBlobsGlueing::TSequentialGluePolicy()); } public: diff --git a/ydb/core/tx/columnshard/blobs_action/bs/remove.h b/ydb/core/tx/columnshard/blobs_action/bs/remove.h index 59c8bf38dcaf..4067b9c7e8cd 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/remove.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/remove.h @@ -1,37 +1,36 @@ #pragma once +#include "blob_manager.h" #include -#include -#include namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { class TDeclareRemovingAction: public IBlobsDeclareRemovingAction { private: using TBase = IBlobsDeclareRemovingAction; - NColumnShard::TBlobManager* Manager; + TBlobManager* Manager; protected: - virtual void DoDeclareRemove(const TUnifiedBlobId& /*blobId*/) { + virtual void DoDeclareRemove(const TTabletId /*tabletId*/, const TUnifiedBlobId& /*blobId*/) { } - virtual void DoOnExecuteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { + virtual void DoOnExecuteTxAfterRemoving(TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { if (blobsWroteSuccessfully) { - for (auto&& i : GetDeclaredBlobs()) { - Manager->DeleteBlobOnExecute(i, dbBlobs); + for (auto i = GetDeclaredBlobs().GetIterator(); i.IsValid(); ++i) { + Manager->DeleteBlobOnExecute(i.GetTabletId(), i.GetBlobId(), dbBlobs); } } } - virtual void DoOnCompleteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, const bool blobsWroteSuccessfully) { + virtual void DoOnCompleteTxAfterRemoving(const bool blobsWroteSuccessfully) { if (blobsWroteSuccessfully) { - for (auto&& i : GetDeclaredBlobs()) { - Manager->DeleteBlobOnComplete(i); + for (auto i = GetDeclaredBlobs().GetIterator(); i.IsValid(); ++i) { + Manager->DeleteBlobOnComplete(i.GetTabletId(), i.GetBlobId()); } } } public: - TDeclareRemovingAction(const TString& storageId, NColumnShard::TBlobManager& manager) - : TBase(storageId) + TDeclareRemovingAction(const TString& storageId, const std::shared_ptr& counters, TBlobManager& manager) + : TBase(storageId, manager.GetSelfTabletId(), counters) , Manager(&manager) { diff --git a/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp b/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp index 7b9b864b8bb7..80a9c356ec40 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/storage.cpp @@ -8,8 +8,8 @@ namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { -std::shared_ptr TOperator::DoStartDeclareRemovingAction() { - return std::make_shared(GetStorageId(), *Manager); +std::shared_ptr TOperator::DoStartDeclareRemovingAction(const std::shared_ptr& counters) { + return std::make_shared(GetStorageId(), counters, *Manager); } std::shared_ptr TOperator::DoStartWritingAction() { @@ -21,22 +21,23 @@ std::shared_ptr TOperator::DoStartReadingAc } std::shared_ptr TOperator::DoStartGCAction(const std::shared_ptr& counters) const { - auto gcTask = Manager->BuildGCTask(GetStorageId(), Manager); - if (!gcTask || gcTask->IsEmpty()) { + auto gcTask = Manager->BuildGCTask(GetStorageId(), Manager, GetSharedBlobs(), counters); + if (!gcTask) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartGCSkipped"); return nullptr; + } else { + AFL_VERIFY(!gcTask->IsEmpty()); } - gcTask->SetCounters(counters); - auto requests = gcTask->BuildRequests(PerGenerationCounter, Manager->GetTabletId(), Manager->GetCurrentGen()); - AFL_VERIFY(requests.size()); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartGC")("requests_count", requests.size()); - TActorContext::AsActorContext().Register(new TGarbageCollectionActor(gcTask, std::move(requests), TabletActorId)); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartGC")("requests_count", gcTask->GetListsByGroupId().size()); + TActorContext::AsActorContext().Register(new TGarbageCollectionActor(gcTask, TabletActorId, GetSelfTabletId())); return gcTask; } -TOperator::TOperator(const TString& storageId, const NActors::TActorId& tabletActorId, const TIntrusivePtr& tabletInfo, const ui64 generation) - : TBase(storageId) - , Manager(std::make_shared(tabletInfo, generation)) +TOperator::TOperator(const TString& storageId, + const NActors::TActorId& tabletActorId, const TIntrusivePtr& tabletInfo, + const ui64 generation, const std::shared_ptr& sharedBlobs) + : TBase(storageId, sharedBlobs) + , Manager(std::make_shared(tabletInfo, generation, sharedBlobs->GetSelfTabletId())) , BlobCacheActorId(NBlobCache::MakeBlobCacheServiceId()) , TabletActorId(tabletActorId) { diff --git a/ydb/core/tx/columnshard/blobs_action/bs/storage.h b/ydb/core/tx/columnshard/blobs_action/bs/storage.h index 9e39e6173f0e..230b703020b8 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/storage.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/storage.h @@ -1,8 +1,8 @@ #pragma once +#include "blob_manager.h" #include #include -#include #include namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { @@ -10,24 +10,30 @@ namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { class TOperator: public IBlobsStorageOperator { private: using TBase = IBlobsStorageOperator; - std::shared_ptr Manager; + std::shared_ptr Manager; const TActorId BlobCacheActorId; mutable ui64 PerGenerationCounter = 1; const TActorId TabletActorId; protected: - virtual std::shared_ptr DoStartDeclareRemovingAction() override; + virtual std::shared_ptr DoStartDeclareRemovingAction(const std::shared_ptr& counters) override; virtual std::shared_ptr DoStartWritingAction() override; virtual std::shared_ptr DoStartReadingAction() override; virtual std::shared_ptr DoStartGCAction(const std::shared_ptr& counters) const override; - virtual bool DoLoad(NColumnShard::IBlobManagerDb& dbBlobs) override { - return Manager->LoadState(dbBlobs); + virtual bool DoLoad(IBlobManagerDb& dbBlobs) override { + return Manager->LoadState(dbBlobs, GetSelfTabletId()); } - virtual void DoOnTieringModified(const std::shared_ptr& /*tiers*/) override { + virtual void DoOnTieringModified(const std::shared_ptr& /*tiers*/) override { return; } public: - TOperator(const TString& storageId, const NActors::TActorId& tabletActorId, const TIntrusivePtr& tabletInfo, const ui64 generation); + TOperator(const TString& storageId, const NActors::TActorId& tabletActorId, + const TIntrusivePtr& tabletInfo, const ui64 generation, const std::shared_ptr& sharedBlobs); + + virtual TTabletsByBlob GetBlobsToDelete() const override { + return Manager->GetBlobsToDeleteAll(); + } + virtual std::shared_ptr GetBlobsTracker() const override { return Manager; } diff --git a/ydb/core/tx/columnshard/blobs_action/bs/write.cpp b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp index cc3fd41bccd4..c72805d16b6c 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/bs/write.cpp @@ -3,7 +3,7 @@ namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { -void TWriteAction::DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { +void TWriteAction::DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { ui64 blobsWritten = BlobBatch.GetBlobCount(); ui64 bytesWritten = BlobBatch.GetTotalSize(); if (blobsWroteSuccessfully) { diff --git a/ydb/core/tx/columnshard/blobs_action/bs/write.h b/ydb/core/tx/columnshard/blobs_action/bs/write.h index d02f9b0dd7c6..049dbf1dd3c1 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/write.h +++ b/ydb/core/tx/columnshard/blobs_action/bs/write.h @@ -1,16 +1,15 @@ #pragma once +#include "blob_manager.h" #include -#include -#include namespace NKikimr::NOlap::NBlobOperations::NBlobStorage { class TWriteAction: public IBlobsWritingAction { private: using TBase = IBlobsWritingAction; - NColumnShard::TBlobBatch BlobBatch; - std::shared_ptr Manager; + TBlobBatch BlobBatch; + std::shared_ptr Manager; protected: virtual void DoSendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) override { return BlobBatch.SendWriteBlobRequest(data, blobId, TInstant::Max(), TActorContext::AsActorContext()); @@ -20,7 +19,7 @@ class TWriteAction: public IBlobsWritingAction { return BlobBatch.OnBlobWriteResult(blobId.GetLogoBlobId(), status); } - virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& /*dbBlobs*/) override { + virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/, TBlobManagerDb& /*dbBlobs*/) override { return; } @@ -28,7 +27,7 @@ class TWriteAction: public IBlobsWritingAction { return; } - virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) override; + virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) override; virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, const bool /*blobsWroteSuccessfully*/) override { } @@ -41,7 +40,7 @@ class TWriteAction: public IBlobsWritingAction { return BlobBatch.AllocateNextBlobId(data); } - TWriteAction(const TString& storageId, const std::shared_ptr& manager) + TWriteAction(const TString& storageId, const std::shared_ptr& manager) : TBase(storageId) , BlobBatch(manager->StartBlobBatch()) , Manager(manager) diff --git a/ydb/core/tx/columnshard/blobs_action/bs/ya.make b/ydb/core/tx/columnshard/blobs_action/bs/ya.make index 5974c3731cee..96b89d123e9d 100644 --- a/ydb/core/tx/columnshard/blobs_action/bs/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/bs/ya.make @@ -7,6 +7,7 @@ SRCS( read.cpp storage.cpp remove.cpp + blob_manager.cpp ) PEERDIR( @@ -14,6 +15,8 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/tablet_flat ydb/core/tx/tiering + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/blobs_action/abstract ) END() diff --git a/ydb/core/tx/columnshard/blobs_action/common/const.cpp b/ydb/core/tx/columnshard/blobs_action/common/const.cpp new file mode 100644 index 000000000000..0c63111e4497 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/common/const.cpp @@ -0,0 +1,5 @@ +#include "const.h" + +namespace NKikimr::NOlap::NBlobOperations { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/blobs_action/common/const.h b/ydb/core/tx/columnshard/blobs_action/common/const.h new file mode 100644 index 000000000000..8901620b2dd0 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/common/const.h @@ -0,0 +1,12 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NBlobOperations { + +class TGlobal { +public: + static const inline TString DefaultStorageId = "__DEFAULT"; + static const inline TString MemoryStorageId = "__MEMORY"; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/blobs_action/common/ya.make b/ydb/core/tx/columnshard/blobs_action/common/ya.make new file mode 100644 index 000000000000..0d6ae9574b16 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/common/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +SRCS( + const.cpp +) + +PEERDIR( +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/counters/storage.cpp b/ydb/core/tx/columnshard/blobs_action/counters/storage.cpp index 8d8d9733e40f..9fec504f7d38 100644 --- a/ydb/core/tx/columnshard/blobs_action/counters/storage.cpp +++ b/ydb/core/tx/columnshard/blobs_action/counters/storage.cpp @@ -1,4 +1,6 @@ #include "storage.h" +#include +#include namespace NKikimr::NOlap::NBlobOperations { @@ -6,10 +8,18 @@ TStorageCounters::TStorageCounters(const TString& storageId) : TBase("BlobStorages") { DeepSubGroup("StorageId", storageId); + Consumers.resize((ui32)EConsumer::COUNT); + for (auto&& i : GetEnumAllValues()) { + if (i == EConsumer::COUNT) { + continue; + } + Consumers[(ui32)i] = std::make_shared(::ToString(i), *this); + } } -std::shared_ptr TStorageCounters::GetConsumerCounter(const TString& consumerId) { - return std::make_shared(consumerId, *this); +std::shared_ptr TStorageCounters::GetConsumerCounter(const EConsumer consumer) { + AFL_VERIFY((ui32)consumer < Consumers.size()); + return Consumers[(ui32)consumer]; } TConsumerCounters::TConsumerCounters(const TString& consumerId, const TStorageCounters& parent) diff --git a/ydb/core/tx/columnshard/blobs_action/counters/storage.h b/ydb/core/tx/columnshard/blobs_action/counters/storage.h index 902766af1545..617c952c84c5 100644 --- a/ydb/core/tx/columnshard/blobs_action/counters/storage.h +++ b/ydb/core/tx/columnshard/blobs_action/counters/storage.h @@ -11,6 +11,25 @@ namespace NKikimr::NOlap::NBlobOperations { class TStorageCounters; +enum class EConsumer { + TTL = 0, + GENERAL_COMPACTION, + INDEXATION, + CLEANUP_TABLES, + CLEANUP_PORTIONS, + CLEANUP_INSERT_TABLE, + CLEANUP_SHARED_BLOBS, + EXPORT, + SCAN, + GC, + WRITING, + WRITING_BUFFER, + WRITING_OPERATOR, + NORMALIZER, + + COUNT +}; + class TConsumerCounters: public NColumnShard::TCommonCountersOwner { private: using TBase = NColumnShard::TCommonCountersOwner; @@ -25,10 +44,11 @@ class TConsumerCounters: public NColumnShard::TCommonCountersOwner { class TStorageCounters: public NColumnShard::TCommonCountersOwner { private: using TBase = NColumnShard::TCommonCountersOwner; + std::vector> Consumers; public: TStorageCounters(const TString& storageId); - std::shared_ptr GetConsumerCounter(const TString& consumerId); + std::shared_ptr GetConsumerCounter(const EConsumer consumer); }; diff --git a/ydb/core/tx/columnshard/blobs_action/counters/ya.make b/ydb/core/tx/columnshard/blobs_action/counters/ya.make index 34b9f0747546..4c1aef6d35b8 100644 --- a/ydb/core/tx/columnshard/blobs_action/counters/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/counters/ya.make @@ -14,4 +14,6 @@ PEERDIR( ydb/core/tablet_flat ) +GENERATE_ENUM_SERIALIZATION(storage.h) + END() diff --git a/ydb/core/tx/columnshard/blobs_action/events/delete_blobs.cpp b/ydb/core/tx/columnshard/blobs_action/events/delete_blobs.cpp new file mode 100644 index 000000000000..19bdbec65079 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/events/delete_blobs.cpp @@ -0,0 +1,5 @@ +#include "delete_blobs.h" + +namespace NKikimr::NOlap::NBlobOperations::NEvents { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/blobs_action/events/delete_blobs.h b/ydb/core/tx/columnshard/blobs_action/events/delete_blobs.h new file mode 100644 index 000000000000..50e37bf7fe0f --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/events/delete_blobs.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NBlobOperations::NEvents { + +struct TEvDeleteSharedBlobs: public NActors::TEventPB { + TEvDeleteSharedBlobs() = default; + + TEvDeleteSharedBlobs(const NActors::TActorId sourceActorId, const ui64 sourceTabletId, const TString& storageId, const THashSet& blobIds) { + Record.SetStorageId(storageId); + Record.SetSourceTabletId(sourceTabletId); + NActors::ActorIdToProto(sourceActorId, Record.MutableSourceActorId()); + for (auto&& i : blobIds) { + *Record.AddBlobIds() = i.ToStringNew(); + } + } +}; + +struct TEvDeleteSharedBlobsFinished: public NActors::TEventPB { + TEvDeleteSharedBlobsFinished() = default; + TEvDeleteSharedBlobsFinished(const TTabletId tabletId) + { + Record.SetTabletId((ui64)tabletId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/blobs_action/events/ya.make b/ydb/core/tx/columnshard/blobs_action/events/ya.make new file mode 100644 index 000000000000..e46f5f0120be --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/events/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + delete_blobs.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/blobs_action/protos + ydb/library/actors/core + ydb/core/tx/datashard +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/memory.h b/ydb/core/tx/columnshard/blobs_action/memory.h deleted file mode 100644 index 2eee28698811..000000000000 --- a/ydb/core/tx/columnshard/blobs_action/memory.h +++ /dev/null @@ -1,173 +0,0 @@ -#pragma once - -#include -#include -#include - -namespace NKikimr::NOlap { - -class TMemoryStorage { -private: - THashMap Data; - THashMap DataWriting; - THashSet DataForRemove; - TMutex Mutex; -public: - std::optional Read(const TUnifiedBlobId& id) { - TGuard g(Mutex); - auto it = Data.find(id); - if (it == Data.end()) { - return {}; - } else { - return it->second; - } - } - - void DeclareDataForRemove(const TUnifiedBlobId& id) { - TGuard g(Mutex); - DataForRemove.emplace(id); - } - - void StartWriting(const TUnifiedBlobId& id, const TString& data) { - TGuard g(Mutex); - Y_ABORT_UNLESS(DataWriting.emplace(id, data).second); - } - - void CommitWriting(const TUnifiedBlobId& id) { - TGuard g(Mutex); - auto it = DataWriting.find(id); - Y_ABORT_UNLESS(it != DataWriting.end()); - Y_ABORT_UNLESS(Data.emplace(id, it->second).second); - DataWriting.erase(it); - } - - TMemoryStorage() = default; -}; - -class TMemoryWriteAction: public IBlobsWritingAction { -private: - using TBase = IBlobsWritingAction; - const std::shared_ptr Storage; -protected: - virtual void DoSendWriteBlobRequest(const TString& data, const TUnifiedBlobId& blobId) override { - Storage->StartWriting(blobId, data); - TActorContext::AsActorContext().Send(TActorContext::AsActorContext().SelfID, std::make_unique( - NKikimrProto::EReplyStatus::OK, blobId.GetLogoBlobId(), TStorageStatusFlags(), 0, 0)); - } - - virtual void DoOnBlobWriteResult(const TUnifiedBlobId& blobId, const NKikimrProto::EReplyStatus status) override { - Y_ABORT_UNLESS(status == NKikimrProto::EReplyStatus::OK); - Storage->CommitWriting(blobId); - } - - virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& /*dbBlobs*/) override { - return; - } - - virtual void DoOnCompleteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/) override { - return; - } - - virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& /*dbBlobs*/, const bool /*blobsWroteSuccessfully*/) override { - - } - virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, const bool /*blobsWroteSuccessfully*/) override { - - } -public: - virtual bool NeedDraftTransaction() const override { - return true; - } - - virtual TUnifiedBlobId AllocateNextBlobId(const TString& /*data*/) override { - return TUnifiedBlobId(); -// return BlobBatch.AllocateNextBlobId(data); - } - - TMemoryWriteAction(const TString& storageId, const std::shared_ptr& storage) - : TBase(storageId) - , Storage(storage) - { - - } -}; - -class TMemoryDeclareRemovingAction: public IBlobsDeclareRemovingAction { -private: - using TBase = IBlobsDeclareRemovingAction; - const std::shared_ptr Storage; -protected: - virtual void DoDeclareRemove(const TUnifiedBlobId& /*blobId*/) { - - } - - virtual void DoOnExecuteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& /*dbBlobs*/, const bool /*blobsWroteSuccessfully*/) { - for (auto&& i : GetDeclaredBlobs()) { - Storage->DeclareDataForRemove(i); - } - } - virtual void DoOnCompleteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, const bool /*blobsWroteSuccessfully*/) { - - } -public: - - TMemoryDeclareRemovingAction(const TString& storageId, const std::shared_ptr& storage) - : TBase(storageId) - , Storage(storage) { - - } -}; - -class TMemoryReadingAction: public IBlobsReadingAction { -private: - using TBase = IBlobsReadingAction; - const std::shared_ptr Storage; -protected: - virtual void DoStartReading(const THashMap>& ranges) override { - for (auto&& i : ranges) { - auto data = Storage->Read(i.first); - for (auto&& r : i.second) { - if (!data) { - TActorContext::AsActorContext().Send(TActorContext::AsActorContext().SelfID, - new NBlobCache::TEvBlobCache::TEvReadBlobRangeResult(r, NKikimrProto::EReplyStatus::NODATA, "")); - } else { - Y_ABORT_UNLESS(r.Offset + r.Size <= data->size()); - TActorContext::AsActorContext().Send(TActorContext::AsActorContext().SelfID, - new NBlobCache::TEvBlobCache::TEvReadBlobRangeResult(r, NKikimrProto::EReplyStatus::OK, data->substr(r.Offset, r.Size))); - } - } - } - } -public: - - TMemoryReadingAction(const TString& storageId, const std::shared_ptr& storage) - : TBase(storageId) - , Storage(storage) - { - - } -}; - -class TMemoryOperator: public IBlobsStorageOperator { -private: - using TBase = IBlobsStorageOperator; - std::shared_ptr Storage; -protected: - virtual std::shared_ptr DoStartDeclareRemovingAction() override { - return std::make_shared(GetStorageId(), Storage); - } - virtual std::shared_ptr DoStartWritingAction() override { - return std::make_shared(GetStorageId(), Storage); - } - virtual std::shared_ptr DoStartReadingAction() override { - return std::make_shared(GetStorageId(), Storage); - } -public: - TMemoryOperator(const TString& storageId) - : TBase(storageId) - { - Storage = std::make_shared(); - } -}; - -} diff --git a/ydb/core/tx/columnshard/blobs_action/protos/blobs.proto b/ydb/core/tx/columnshard/blobs_action/protos/blobs.proto new file mode 100644 index 000000000000..7c866b54c7d1 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/protos/blobs.proto @@ -0,0 +1,19 @@ +package NKikimrColumnShardBlobOperationsProto; + +message TBlobTablets { + optional string BlobId = 1; + repeated uint64 TabletIds = 2; +} + +message TTabletsByBlob { + repeated TBlobTablets Blobs = 1; +} + +message TBlobTablet { + optional string BlobId = 1; + optional uint64 TabletId = 2; +} + +message TTabletByBlob { + repeated TBlobTablet Blobs = 1; +} diff --git a/ydb/core/tx/columnshard/blobs_action/protos/events.proto b/ydb/core/tx/columnshard/blobs_action/protos/events.proto new file mode 100644 index 000000000000..34ae1fc57e8b --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/protos/events.proto @@ -0,0 +1,14 @@ +import "ydb/library/actors/protos/actors.proto"; + +package NKikimrColumnShardBlobOperationsProto; + +message TEvDeleteSharedBlobs { + optional uint64 SourceTabletId = 1; + optional NActorsProto.TActorId SourceActorId = 2; + optional string StorageId = 3; + repeated string BlobIds = 4; +} + +message TEvDeleteSharedBlobsFinished { + optional uint64 TabletId = 1; +} diff --git a/ydb/core/tx/columnshard/blobs_action/protos/ya.make b/ydb/core/tx/columnshard/blobs_action/protos/ya.make new file mode 100644 index 000000000000..705fd373a847 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/protos/ya.make @@ -0,0 +1,12 @@ +PROTO_LIBRARY() + +SRCS( + events.proto + blobs.proto +) + +PEERDIR( + ydb/library/actors/protos +) + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp new file mode 100644 index 000000000000..4d162527e8e6 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.cpp @@ -0,0 +1,50 @@ +#include "manager.h" +#include +#include +#ifndef KIKIMR_DISABLE_S3_OPS +#include +#endif +#include +#include + +namespace NKikimr::NOlap { + +std::shared_ptr TStoragesManager::DoBuildOperator(const TString& storageId) { + if (storageId == TBase::DefaultStorageId) { + return std::make_shared(storageId, Shard.SelfId(), Shard.Info(), + Shard.Executor()->Generation(), SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + } else if (storageId == TBase::MemoryStorageId) { +#ifndef KIKIMR_DISABLE_S3_OPS + { + static TMutex mutexLocal; + TGuard g(mutexLocal); + Singleton()->SetSecretKey("fakeSecret"); + } + return std::make_shared(storageId, Shard.SelfId(), std::make_shared("fakeBucket", "fakeSecret"), + SharedBlobsManager->GetStorageManagerGuarantee(storageId)); +#else + return nullptr; +#endif + } else if (!Shard.Tiers) { + return nullptr; + } else { +#ifndef KIKIMR_DISABLE_S3_OPS + return std::make_shared(storageId, Shard, SharedBlobsManager->GetStorageManagerGuarantee(storageId)); +#else + return nullptr; +#endif + } +} + +bool TStoragesManager::DoLoadIdempotency(NTable::TDatabase& database) { + return SharedBlobsManager->LoadIdempotency(database); +} + +TStoragesManager::TStoragesManager(NColumnShard::TColumnShard& shard) + : Shard(shard) + , SharedBlobsManager(std::make_shared((TTabletId)Shard.TabletID())) +{ + +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.h b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.h new file mode 100644 index 000000000000..00828c89a713 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/manager.h @@ -0,0 +1,31 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimr::NOlap { + +class TStoragesManager: public IStoragesManager { +private: + using TBase = IStoragesManager; + NColumnShard::TColumnShard& Shard; + std::shared_ptr SharedBlobsManager; +protected: + virtual std::shared_ptr DoBuildOperator(const TString& storageId) override; + virtual bool DoLoadIdempotency(NTable::TDatabase& database) override; + + virtual const std::shared_ptr& DoGetSharedBlobsManager() const override { + AFL_VERIFY(SharedBlobsManager); + return SharedBlobsManager; + } + +public: + TStoragesManager(NColumnShard::TColumnShard& shard); +}; + + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make b/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make new file mode 100644 index 000000000000..b79b6720608b --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/storages_manager/ya.make @@ -0,0 +1,22 @@ +LIBRARY() + +SRCS( + manager.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/manager + ydb/core/tx/columnshard/blobs_action/bs +) + +IF (OS_WINDOWS) + CFLAGS( + -DKIKIMR_DISABLE_S3_OPS + ) +ELSE() + PEERDIR( + ydb/core/tx/columnshard/blobs_action/tier + ) +ENDIF() + +END() diff --git a/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp b/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp index 6cda6d152ec2..9d94dafe939d 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp +++ b/ydb/core/tx/columnshard/blobs_action/tier/adapter.cpp @@ -16,9 +16,9 @@ std::unique_ptr TRepliesAdapter::RebuildReplyEvent(std::uni } if (ev->IsSuccess()) { AFL_VERIFY(!!ev->Body)("key", ev->Key)("interval_from", ev->GetReadInterval().first)("interval_to", ev->GetReadInterval().second); - return std::make_unique(bRange, NKikimrProto::EReplyStatus::OK, ev->Body); + return std::make_unique(bRange, NKikimrProto::EReplyStatus::OK, ev->Body, false, StorageId); } else { - return std::make_unique(bRange, NKikimrProto::EReplyStatus::ERROR, ""); + return std::make_unique(bRange, NKikimrProto::EReplyStatus::ERROR, TStringBuilder() << ev->Result, false, StorageId); } } @@ -28,9 +28,9 @@ std::unique_ptr TRepliesAdapter::RebuildReplyEvent(std::uni Y_ABORT_UNLESS(ev->Key); AFL_VERIFY(TLogoBlobID::Parse(logoBlobId, *ev->Key, error))("error", error)("str_blob_id", *ev->Key); if (ev->IsSuccess()) { - return std::make_unique(NKikimrProto::EReplyStatus::OK, logoBlobId, 0, Max(), 0); + return std::make_unique(NKikimrProto::EReplyStatus::OK, logoBlobId, 0, Max(), 0, StorageId); } else { - return std::make_unique(NKikimrProto::EReplyStatus::ERROR, logoBlobId, 0, Max(), 0); + return std::make_unique(NKikimrProto::EReplyStatus::ERROR, logoBlobId, 0, Max(), 0, StorageId); } } diff --git a/ydb/core/tx/columnshard/blobs_action/tier/adapter.h b/ydb/core/tx/columnshard/blobs_action/tier/adapter.h index 1c3e6af2e88e..4e146e019137 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/adapter.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/adapter.h @@ -4,7 +4,15 @@ namespace NKikimr::NOlap::NBlobOperations::NTier { class TRepliesAdapter: public NWrappers::NExternalStorage::IReplyAdapter { +private: + const TString StorageId; public: + TRepliesAdapter(const TString& storageId) + : StorageId(storageId) + { + + } + virtual std::unique_ptr RebuildReplyEvent(std::unique_ptr&& ev) const override; virtual std::unique_ptr RebuildReplyEvent(std::unique_ptr&& ev) const override; virtual std::unique_ptr RebuildReplyEvent(std::unique_ptr&& ev) const override { diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp b/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp index b44019f880ae..0d9c65f8ec82 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc.cpp @@ -5,33 +5,18 @@ namespace NKikimr::NOlap::NBlobOperations::NTier { -void TGCTask::DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs) { - size_t numBlobs = 0; - - for (; DraftBlobIds.size() && numBlobs < NColumnShard::TLimits::MAX_BLOBS_TO_DELETE; ++numBlobs) { - dbBlobs.RemoveTierDraftBlobId(GetStorageId(), DraftBlobIds.front()); - DraftBlobIds.pop_front(); - } +void TGCTask::RemoveBlobIdFromDB(const TTabletId tabletId, const TUnifiedBlobId& blobId, TBlobManagerDb& dbBlobs) { + dbBlobs.RemoveTierBlobToDelete(GetStorageId(), blobId, tabletId); +} - for (; DeleteBlobIds.size() && numBlobs < NColumnShard::TLimits::MAX_BLOBS_TO_DELETE; ++numBlobs) { - dbBlobs.RemoveTierBlobToDelete(GetStorageId(), DeleteBlobIds.front()); - DeleteBlobIds.pop_front(); +void TGCTask::DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, TBlobManagerDb& dbBlobs) { + for (auto&& i : DraftBlobIds) { + dbBlobs.RemoveTierDraftBlobId(GetStorageId(), i); } } -bool TGCTask::DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction) { - if (DraftBlobIds.size() || DeleteBlobIds.size()) { - TActorContext::AsActorContext().Send(self.SelfId(), std::make_unique(taskAction)); - return false; - } else { - for (auto&& i : DraftBlobIds) { - Counters->OnReply(i.BlobSize()); - } - for (auto&& i : DeleteBlobIds) { - Counters->OnReply(i.BlobSize()); - } - return true; - } +bool TGCTask::DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& /*self*/, const std::shared_ptr& /*taskAction*/) { + return true; } } diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc.h b/ydb/core/tx/columnshard/blobs_action/tier/gc.h index cdae1449f2de..bb97d0350200 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/gc.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc.h @@ -12,27 +12,24 @@ class TGCTask: public IBlobsGCAction { using TBase = IBlobsGCAction; private: YDB_READONLY_DEF(std::deque, DraftBlobIds); - YDB_READONLY_DEF(std::deque, DeleteBlobIds); YDB_READONLY_DEF(NWrappers::NExternalStorage::IExternalStorageOperator::TPtr, ExternalStorageOperator); - const std::shared_ptr Counters; protected: - virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) override; + virtual void DoOnExecuteTxAfterCleaning(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs) override; virtual bool DoOnCompleteTxAfterCleaning(NColumnShard::TColumnShard& self, const std::shared_ptr& taskAction) override; + virtual void RemoveBlobIdFromDB(const TTabletId tabletId, const TUnifiedBlobId& blobId, TBlobManagerDb& dbBlobs) override; + virtual bool DoIsEmpty() const override { + return DraftBlobIds.empty(); + } public: - TGCTask(const TString& storageId, std::deque&& draftBlobIds, std::deque&& deleteBlobIds, - const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr& externalStorageOperator, const std::shared_ptr& counters) - : TBase(storageId) + TGCTask(const TString& storageId, std::deque&& draftBlobIds, const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr& externalStorageOperator, + TBlobsCategories&& blobsToRemove, const std::shared_ptr& counters) + : TBase(storageId, std::move(blobsToRemove), counters) , DraftBlobIds(std::move(draftBlobIds)) - , DeleteBlobIds(std::move(deleteBlobIds)) , ExternalStorageOperator(externalStorageOperator) - , Counters(counters) { for (auto&& i : DraftBlobIds) { Counters->OnRequest(i.BlobSize()); } - for (auto&& i : DeleteBlobIds) { - Counters->OnRequest(i.BlobSize()); - } } }; diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp b/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp index 0fce2762142f..2c91679895fd 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.cpp @@ -9,19 +9,15 @@ void TGarbageCollectionActor::Handle(NWrappers::NExternalStorage::TEvDeleteObjec Y_ABORT_UNLESS(ev->Get()->Key); AFL_VERIFY(TLogoBlobID::Parse(logoBlobId, *ev->Get()->Key, errorMessage))("error", errorMessage); BlobIdsToRemove.erase(logoBlobId); - if (BlobIdsToRemove.empty()) { - auto g = PassAwayGuard(); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "finished"); - TActorContext::AsActorContext().Send(TabletActorId, std::make_unique(GCTask)); - } + CheckFinished(); } -void TGarbageCollectionActor::Bootstrap(const TActorContext& /*ctx*/) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "starting"); - for (auto&& i : GCTask->GetDraftBlobIds()) { - BlobIdsToRemove.emplace(i.GetLogoBlobId()); +void TGarbageCollectionActor::Bootstrap(const TActorContext& ctx) { + for (auto i = GCTask->GetBlobsToRemove().GetDirect().GetIterator(); i.IsValid(); ++i) { + BlobIdsToRemove.emplace(i.GetBlobId().GetLogoBlobId()); } - for (auto&& i : GCTask->GetDeleteBlobIds()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "starting")("storage_id", GCTask->GetStorageId())("drafts", GCTask->GetDraftBlobIds().size())("to_delete", BlobIdsToRemove.size()); + for (auto&& i : GCTask->GetDraftBlobIds()) { BlobIdsToRemove.emplace(i.GetLogoBlobId()); } for (auto&& i : BlobIdsToRemove) { @@ -31,7 +27,16 @@ void TGarbageCollectionActor::Bootstrap(const TActorContext& /*ctx*/) { TAutoPtr> evPtr((TEventHandle*)hRequest.release()); GCTask->GetExternalStorageOperator()->Execute(evPtr); } + TBase::Bootstrap(ctx); Become(&TGarbageCollectionActor::StateWork); } +void TGarbageCollectionActor::CheckFinished() { + if (SharedRemovingFinished && BlobIdsToRemove.empty()) { + auto g = PassAwayGuard(); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("actor", "TGarbageCollectionActor")("event", "finished"); + TActorContext::AsActorContext().Send(TabletActorId, std::make_unique(GCTask)); + } +} + } diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.h b/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.h index 448c07899652..78ee58e9152d 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc_actor.h @@ -1,22 +1,30 @@ #pragma once #include "gc.h" -#include +#include #include +#include #include namespace NKikimr::NOlap::NBlobOperations::NTier { -class TGarbageCollectionActor: public TActorBootstrapped { +class TGarbageCollectionActor: public TSharedBlobsCollectionActor { private: + using TBase = TSharedBlobsCollectionActor; const NActors::TActorId TabletActorId; std::shared_ptr GCTask; THashSet BlobIdsToRemove; void Handle(NWrappers::NExternalStorage::TEvDeleteObjectResponse::TPtr& ev); + void CheckFinished(); + + virtual void DoOnSharedRemovingFinished() override { + CheckFinished(); + } public: - TGarbageCollectionActor(const std::shared_ptr& task, const NActors::TActorId& tabletActorId) - : TabletActorId(tabletActorId) + TGarbageCollectionActor(const std::shared_ptr& task, const NActors::TActorId& tabletActorId, const TTabletId selfTabletId) + : TBase(task->GetStorageId(), selfTabletId, task->GetBlobsToRemove().GetBorrowed()) + , TabletActorId(tabletActorId) , GCTask(task) { @@ -25,6 +33,8 @@ class TGarbageCollectionActor: public TActorBootstrappedGetTypeRewrite()) { hFunc(NWrappers::NExternalStorage::TEvDeleteObjectResponse, Handle); + default: + TBase::StateWork(ev); } } diff --git a/ydb/core/tx/columnshard/blobs_action/tier/gc_info.h b/ydb/core/tx/columnshard/blobs_action/tier/gc_info.h index 8814ccabe2a7..2aad8beb5c03 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/gc_info.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/gc_info.h @@ -7,28 +7,30 @@ namespace NKikimr::NOlap::NBlobOperations::NTier { class TGCInfo: public TCommonBlobsTracker { private: - YDB_ACCESSOR_DEF(std::deque, BlobsToDelete); + YDB_ACCESSOR_DEF(TTabletsByBlob, BlobsToDelete); YDB_ACCESSOR_DEF(std::deque, DraftBlobIdsToRemove); - YDB_ACCESSOR_DEF(THashSet, BlobsToDeleteInFuture); + YDB_ACCESSOR_DEF(TTabletsByBlob, BlobsToDeleteInFuture); public: virtual void OnBlobFree(const TUnifiedBlobId& blobId) override { - if (BlobsToDeleteInFuture.erase(blobId)) { - BlobsToDelete.emplace_back(blobId); - } + BlobsToDeleteInFuture.ExtractBlobTo(blobId, BlobsToDelete); } - bool ExtractForGC(std::deque& deleteDraftBlobIds, std::deque& deleteBlobIds, const ui32 blobsCountLimit) { - if (DraftBlobIdsToRemove.empty() && BlobsToDelete.empty()) { + bool ExtractForGC(std::deque& deleteDraftBlobIds, TTabletsByBlob& deleteBlobIds, const ui32 blobsCountLimit) { + if (DraftBlobIdsToRemove.empty() && BlobsToDelete.IsEmpty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "extract_for_gc_skip")("reason", "no_data"); return false; } - while (DraftBlobIdsToRemove.size() && deleteBlobIds.size() + deleteDraftBlobIds.size() < blobsCountLimit) { + ui32 count = 0; + TTabletsByBlob deleteBlobIdsLocal; + while (DraftBlobIdsToRemove.size() && count < blobsCountLimit) { deleteDraftBlobIds.emplace_back(DraftBlobIdsToRemove.front()); DraftBlobIdsToRemove.pop_front(); + ++count; } - while (BlobsToDelete.size() && deleteBlobIds.size() + deleteDraftBlobIds.size() < blobsCountLimit) { - deleteBlobIds.emplace_back(BlobsToDelete.front()); - BlobsToDelete.pop_front(); + while (BlobsToDelete.ExtractFrontTo(deleteBlobIdsLocal) && count < blobsCountLimit) { + ++count; } + std::swap(deleteBlobIdsLocal, deleteBlobIds); return true; } }; diff --git a/ydb/core/tx/columnshard/blobs_action/tier/read.cpp b/ydb/core/tx/columnshard/blobs_action/tier/read.cpp index 754dd289d730..7af0c28583af 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/read.cpp +++ b/ydb/core/tx/columnshard/blobs_action/tier/read.cpp @@ -2,17 +2,15 @@ namespace NKikimr::NOlap::NBlobOperations::NTier { -void TReadingAction::DoStartReading(const THashMap>& ranges) { - for (auto&& i : ranges) { - for (auto&& r : i.second) { - auto awsRequest = Aws::S3::Model::GetObjectRequest() - .WithKey(i.first.GetLogoBlobId().ToString()) - .WithRange(TStringBuilder() << "bytes=" << r.Offset << "-" << r.Offset + r.Size - 1); - auto request = std::make_unique(awsRequest); - auto hRequest = std::make_unique(NActors::TActorId(), TActorContext::AsActorContext().SelfID, request.release()); - TAutoPtr> evPtr((TEventHandle*)hRequest.release()); - ExternalStorageOperator->Execute(evPtr); - } +void TReadingAction::DoStartReading(THashSet&& ranges) { + for (auto&& r : ranges) { + auto awsRequest = Aws::S3::Model::GetObjectRequest() + .WithKey(r.BlobId.GetLogoBlobId().ToString()) + .WithRange(TStringBuilder() << "bytes=" << r.Offset << "-" << r.Offset + r.Size - 1); + auto request = std::make_unique(awsRequest); + auto hRequest = std::make_unique(NActors::TActorId(), TActorContext::AsActorContext().SelfID, request.release()); + TAutoPtr> evPtr((TEventHandle*)hRequest.release()); + ExternalStorageOperator->Execute(evPtr); } } diff --git a/ydb/core/tx/columnshard/blobs_action/tier/read.h b/ydb/core/tx/columnshard/blobs_action/tier/read.h index b9804d18b662..e4742cc285ee 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/read.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/read.h @@ -10,7 +10,10 @@ class TReadingAction: public IBlobsReadingAction { using TBase = IBlobsReadingAction; const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr ExternalStorageOperator; protected: - virtual void DoStartReading(const THashMap>& ranges) override; + virtual void DoStartReading(THashSet&& ranges) override; + virtual THashMap> GroupBlobsForOptimization(std::vector&& ranges) const override { + return TBlobsGlueing::GroupRanges(std::move(ranges), TBlobsGlueing::TBlobGluePolicy(8LLU << 20)); + } public: TReadingAction(const TString& storageId, const NWrappers::NExternalStorage::IExternalStorageOperator::TPtr& storageOperator) diff --git a/ydb/core/tx/columnshard/blobs_action/tier/remove.h b/ydb/core/tx/columnshard/blobs_action/tier/remove.h index 75924c6d5eba..9dbd6fb0be9f 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/remove.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/remove.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include "gc_info.h" @@ -12,31 +11,31 @@ class TDeclareRemovingAction: public IBlobsDeclareRemovingAction { using TBase = IBlobsDeclareRemovingAction; std::shared_ptr GCInfo; protected: - virtual void DoDeclareRemove(const TUnifiedBlobId& /*blobId*/) { + virtual void DoDeclareRemove(const TTabletId /*tabletId*/, const TUnifiedBlobId& /*blobId*/) { } - virtual void DoOnExecuteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { + virtual void DoOnExecuteTxAfterRemoving(TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { if (blobsWroteSuccessfully) { - for (auto&& i : GetDeclaredBlobs()) { - dbBlobs.AddTierBlobToDelete(GetStorageId(), i); + for (auto i = GetDeclaredBlobs().GetIterator(); i.IsValid(); ++i) { + dbBlobs.AddTierBlobToDelete(GetStorageId(), i.GetBlobId(), i.GetTabletId()); } } } - virtual void DoOnCompleteTxAfterRemoving(NColumnShard::TColumnShard& /*self*/, const bool blobsWroteSuccessfully) { + virtual void DoOnCompleteTxAfterRemoving(const bool blobsWroteSuccessfully) { if (blobsWroteSuccessfully) { for (auto&& i : GetDeclaredBlobs()) { - if (GCInfo->IsBlobInUsage(i)) { - Y_ABORT_UNLESS(GCInfo->MutableBlobsToDeleteInFuture().emplace(i).second); + if (GCInfo->IsBlobInUsage(i.first)) { + AFL_VERIFY(GCInfo->MutableBlobsToDeleteInFuture().Add(i.first, i.second)); } else { - GCInfo->MutableBlobsToDelete().emplace_back(i); + AFL_VERIFY(GCInfo->MutableBlobsToDelete().Add(i.first, i.second)); } } } } public: - TDeclareRemovingAction(const TString& storageId, const std::shared_ptr& gcInfo) - : TBase(storageId) + TDeclareRemovingAction(const TString& storageId, const TTabletId selfTabletId, const std::shared_ptr& counters, const std::shared_ptr& gcInfo) + : TBase(storageId, selfTabletId, counters) , GCInfo(gcInfo) { diff --git a/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp b/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp index 5c0db48f42a2..36d273111eff 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp +++ b/ydb/core/tx/columnshard/blobs_action/tier/storage.cpp @@ -15,12 +15,12 @@ NWrappers::NExternalStorage::IExternalStorageOperator::TPtr TOperator::GetCurren return ExternalStorageOperator; } -std::shared_ptr TOperator::DoStartDeclareRemovingAction() { - return std::make_shared(GetStorageId(), GCInfo); +std::shared_ptr TOperator::DoStartDeclareRemovingAction(const std::shared_ptr& counters) { + return std::make_shared(GetStorageId(), GetSelfTabletId(), counters, GCInfo); } std::shared_ptr TOperator::DoStartWritingAction() { - return std::make_shared(GetStorageId(), GetCurrentOperator(), TabletId, GCInfo); + return std::make_shared(GetStorageId(), GetCurrentOperator(), (ui64)GetSelfTabletId(), GCInfo); } std::shared_ptr TOperator::DoStartReadingAction() { @@ -29,12 +29,22 @@ std::shared_ptr TOperator::DoStartReadingAction() { std::shared_ptr TOperator::DoStartGCAction(const std::shared_ptr& counters) const { std::deque draftBlobIds; - std::deque deleteBlobIds; - if (!GCInfo->ExtractForGC(draftBlobIds, deleteBlobIds, 100000)) { + AFL_VERIFY(!!TabletActorId); + TBlobsCategories categories(TTabletId(0)); + { + TTabletsByBlob deleteBlobIds; + if (!GCInfo->ExtractForGC(draftBlobIds, deleteBlobIds, 100000)) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start_gc_skipped")("reason", "cannot_extract"); + return nullptr; + } + categories = GetSharedBlobs()->BuildRemoveCategories(std::move(deleteBlobIds)); + } + auto gcTask = std::make_shared(GetStorageId(), std::move(draftBlobIds), GetCurrentOperator(), std::move(categories), counters); + if (gcTask->IsEmpty()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start_gc_skipped")("reason", "task_empty"); return nullptr; } - auto gcTask = std::make_shared(GetStorageId(), std::move(draftBlobIds), std::move(deleteBlobIds), GetCurrentOperator(), counters); - TActorContext::AsActorContext().Register(new TGarbageCollectionActor(gcTask, TabletActorId)); + TActorContext::AsActorContext().Register(new TGarbageCollectionActor(gcTask, TabletActorId, GetSelfTabletId())); return gcTask; } @@ -45,23 +55,46 @@ void TOperator::InitNewExternalOperator(const NColumnShard::NTiers::TManager* ti } else { settings.SetEndpoint("nowhere"); } + { + TGuard changeLock(ChangeOperatorLock); + if (CurrentS3Settings && CurrentS3Settings->SerializeAsString() == settings.SerializeAsString()) { + return; + } + } auto extStorageConfig = NWrappers::NExternalStorage::IExternalStorageConfig::Construct(settings); AFL_VERIFY(extStorageConfig); auto extStorageOperator = extStorageConfig->ConstructStorageOperator(false); - extStorageOperator->InitReplyAdapter(std::make_shared()); + extStorageOperator->InitReplyAdapter(std::make_shared(GetStorageId())); + TGuard changeLock(ChangeOperatorLock); + CurrentS3Settings = settings; + ExternalStorageOperator = extStorageOperator; +} + +void TOperator::InitNewExternalOperator() { + AFL_VERIFY(InitializationConfig); + auto extStorageOperator = InitializationConfig->ConstructStorageOperator(false); + extStorageOperator->InitReplyAdapter(std::make_shared(GetStorageId())); TGuard changeLock(ChangeOperatorLock); ExternalStorageOperator = extStorageOperator; } -TOperator::TOperator(const TString& storageId, const NColumnShard::TColumnShard& shard) - : TBase(storageId) - , TabletId(shard.TabletID()) +TOperator::TOperator(const TString& storageId, const NColumnShard::TColumnShard& shard, const std::shared_ptr& storageSharedBlobsManager) + : TBase(storageId, storageSharedBlobsManager) , TabletActorId(shard.SelfId()) { InitNewExternalOperator(shard.GetTierManagerPointer(storageId)); } -void TOperator::DoOnTieringModified(const std::shared_ptr& tiers) { +TOperator::TOperator(const TString& storageId, const TActorId& shardActorId, const std::shared_ptr& storageConfig, + const std::shared_ptr& storageSharedBlobsManager) + : TBase(storageId, storageSharedBlobsManager) + , TabletActorId(shardActorId) + , InitializationConfig(storageConfig) +{ + InitNewExternalOperator(); +} + +void TOperator::DoOnTieringModified(const std::shared_ptr& tiers) { auto* tierManager = tiers->GetManagerOptional(TBase::GetStorageId()); if (tierManager) { InitNewExternalOperator(tierManager); @@ -71,4 +104,15 @@ void TOperator::DoOnTieringModified(const std::shared_ptr draftBlobIdsToRemove; + if (!dbBlobs.LoadTierLists(GetStorageId(), blobsToDelete, draftBlobIdsToRemove, GetSelfTabletId())) { + return false; + } + GCInfo->MutableBlobsToDelete() = std::move(blobsToDelete); + GCInfo->MutableDraftBlobIdsToRemove() = std::move(draftBlobIdsToRemove); + return true; +} + } diff --git a/ydb/core/tx/columnshard/blobs_action/tier/storage.h b/ydb/core/tx/columnshard/blobs_action/tier/storage.h index fd2cd725ac81..f149faa4a1ad 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/storage.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/storage.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include @@ -12,33 +11,39 @@ namespace NKikimr::NOlap::NBlobOperations::NTier { class TOperator: public IBlobsStorageOperator { private: using TBase = IBlobsStorageOperator; - const ui64 TabletId; const NActors::TActorId TabletActorId; std::shared_ptr GCInfo = std::make_shared(); - + std::optional CurrentS3Settings; + NWrappers::NExternalStorage::IExternalStorageConfig::TPtr InitializationConfig; NWrappers::NExternalStorage::IExternalStorageConfig::TPtr ExternalStorageConfig; TSpinLock ChangeOperatorLock; NWrappers::NExternalStorage::IExternalStorageOperator::TPtr ExternalStorageOperator; NWrappers::NExternalStorage::IExternalStorageOperator::TPtr GetCurrentOperator() const; void InitNewExternalOperator(const NColumnShard::NTiers::TManager* tierManager); + void InitNewExternalOperator(); virtual TString DoDebugString() const override { return GetCurrentOperator()->DebugString(); } protected: - virtual std::shared_ptr DoStartDeclareRemovingAction() override; + virtual std::shared_ptr DoStartDeclareRemovingAction(const std::shared_ptr& counters) override; virtual std::shared_ptr DoStartWritingAction() override; virtual std::shared_ptr DoStartReadingAction() override; virtual std::shared_ptr DoStartGCAction(const std::shared_ptr& counters) const override; - virtual bool DoLoad(NColumnShard::IBlobManagerDb& dbBlobs) override { - dbBlobs.LoadTierLists(GetStorageId(), GCInfo->MutableBlobsToDelete(), GCInfo->MutableDraftBlobIdsToRemove()); - return true; - } - virtual void DoOnTieringModified(const std::shared_ptr& tiers) override; + virtual bool DoLoad(IBlobManagerDb& dbBlobs) override; + virtual void DoOnTieringModified(const std::shared_ptr& tiers) override; public: - TOperator(const TString& storageId, const NColumnShard::TColumnShard& shard); + TOperator(const TString& storageId, const NColumnShard::TColumnShard& shard, const std::shared_ptr& storageSharedBlobsManager); + TOperator(const TString& storageId, const TActorId& shardActorId, const std::shared_ptr& storageConfig, + const std::shared_ptr& storageSharedBlobsManager); + + virtual TTabletsByBlob GetBlobsToDelete() const override { + auto result = GCInfo->GetBlobsToDelete(); + result.Add(GCInfo->GetBlobsToDeleteInFuture()); + return result; + } virtual std::shared_ptr GetBlobsTracker() const override { return GCInfo; diff --git a/ydb/core/tx/columnshard/blobs_action/tier/write.cpp b/ydb/core/tx/columnshard/blobs_action/tier/write.cpp index 9620f8d37e55..5ae88eb577d2 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/tier/write.cpp @@ -15,20 +15,15 @@ void TWriteAction::DoSendWriteBlobRequest(const TString& data, const TUnifiedBlo ExternalStorageOperator->Execute(evPtr); } -void TWriteAction::DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { +void TWriteAction::DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) { if (blobsWroteSuccessfully) { for (auto&& i : GetBlobsForWrite()) { dbBlobs.RemoveTierDraftBlobId(GetStorageId(), i.first); } - } else { - for (auto&& i : GetBlobsForWrite()) { - dbBlobs.RemoveTierDraftBlobId(GetStorageId(), i.first); - dbBlobs.AddTierBlobToDelete(GetStorageId(), i.first); - } } } -void TWriteAction::DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/, NColumnShard::TBlobManagerDb& dbBlobs) { +void TWriteAction::DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/, TBlobManagerDb& dbBlobs) { for (auto&& i : GetBlobsForWrite()) { dbBlobs.AddTierDraftBlobId(GetStorageId(), i.first); } @@ -43,7 +38,7 @@ NKikimr::NOlap::TUnifiedBlobId TWriteAction::AllocateNextBlobId(const TString& d void TWriteAction::DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, const bool blobsWroteSuccessfully) { if (!blobsWroteSuccessfully) { for (auto&& i : GetBlobsForWrite()) { - GCInfo->MutableBlobsToDelete().emplace_back(i.first); + GCInfo->MutableDraftBlobIdsToRemove().emplace_back(i.first); } } } diff --git a/ydb/core/tx/columnshard/blobs_action/tier/write.h b/ydb/core/tx/columnshard/blobs_action/tier/write.h index 425cabc27d51..cd1ebb65206c 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/write.h +++ b/ydb/core/tx/columnshard/blobs_action/tier/write.h @@ -1,7 +1,6 @@ #pragma once #include -#include #include #include #include "gc_info.h" @@ -21,13 +20,13 @@ class TWriteAction: public IBlobsWritingAction { Y_ABORT_UNLESS(status == NKikimrProto::EReplyStatus::OK); } - virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs) override; + virtual void DoOnExecuteTxBeforeWrite(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs) override; virtual void DoOnCompleteTxBeforeWrite(NColumnShard::TColumnShard& /*self*/) override { return; } - virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, NColumnShard::TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) override; - virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& /*self*/, const bool blobsWroteSuccessfully) override; + virtual void DoOnExecuteTxAfterWrite(NColumnShard::TColumnShard& self, TBlobManagerDb& dbBlobs, const bool blobsWroteSuccessfully) override; + virtual void DoOnCompleteTxAfterWrite(NColumnShard::TColumnShard& self, const bool blobsWroteSuccessfully) override; public: virtual bool NeedDraftTransaction() const override { return true; diff --git a/ydb/core/tx/columnshard/blobs_action/tier/ya.make b/ydb/core/tx/columnshard/blobs_action/tier/ya.make index 2df622ce24e0..6fd1d1cc0859 100644 --- a/ydb/core/tx/columnshard/blobs_action/tier/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/tier/ya.make @@ -16,6 +16,8 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/tablet_flat ydb/core/tx/tiering + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/blobs_action/abstract ) END() diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp index c041f6cd412a..5d0b9b721f5d 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.cpp @@ -2,4 +2,22 @@ namespace NKikimr::NColumnShard { +bool TTxWriteDraft::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { + TMemoryProfileGuard mpg("TTxWriteDraft::Execute"); + NOlap::TBlobManagerDb blobManagerDb(txc.DB); + for (auto&& action : WriteController->GetBlobActions()) { + action.second->OnExecuteTxBeforeWrite(*Self, blobManagerDb); + } + return true; +} + +void TTxWriteDraft::Complete(const TActorContext& ctx) { + TMemoryProfileGuard mpg("TTxWriteDraft::Complete"); + Completed = true; + for (auto&& action : WriteController->GetBlobActions()) { + action.second->OnCompleteTxBeforeWrite(*Self); + } + ctx.Register(NColumnShard::CreateWriteActor(Self->TabletID(), WriteController, TInstant::Max())); +} + } diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.h index a90db85f0f73..1bd9762d7917 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_draft.h @@ -7,26 +7,23 @@ using namespace NTabletFlatExecutor; class TTxWriteDraft: public TTransactionBase { private: + bool Completed = false; const IWriteController::TPtr WriteController; public: + ~TTxWriteDraft() { + if (!Completed) { + WriteController->Abort("TTxWriteDraft aborted before complete"); + } + } + TTxWriteDraft(TColumnShard* self, const IWriteController::TPtr writeController) : TBase(self) , WriteController(writeController) { + AFL_VERIFY(WriteController); } - bool Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) override { - TBlobManagerDb blobManagerDb(txc.DB); - for (auto&& action : WriteController->GetBlobActions()) { - action->OnExecuteTxBeforeWrite(*Self, blobManagerDb); - } - return true; - } - void Complete(const TActorContext& ctx) override { - for (auto&& action : WriteController->GetBlobActions()) { - action->OnCompleteTxBeforeWrite(*Self); - } - ctx.Register(NColumnShard::CreateWriteActor(Self->TabletID(), WriteController, TInstant::Max())); - } + bool Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) override; + void Complete(const TActorContext& ctx) override; TTxType GetTxType() const override { return TXTYPE_WRITE_DRAFT; } }; diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp index 6c03ae44a96d..73baa553cab9 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_indexed.cpp @@ -2,12 +2,14 @@ namespace NKikimr::NColumnShard { bool TTxGarbageCollectionFinished::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { + TMemoryProfileGuard mpg("TTxGarbageCollectionFinished::Execute"); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("tx", "TxGarbageCollectionFinished")("event", "execute"); - TBlobManagerDb blobManagerDb(txc.DB); + NOlap::TBlobManagerDb blobManagerDb(txc.DB); Action->OnExecuteTxAfterCleaning(*Self, blobManagerDb); return true; } void TTxGarbageCollectionFinished::Complete(const TActorContext& /*ctx*/) { + TMemoryProfileGuard mpg("TTxGarbageCollectionFinished::Complete"); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("tx", "TxGarbageCollectionFinished")("event", "complete"); Action->OnCompleteTxAfterCleaning(*Self, Action); } diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp index 307db45a5d24..ece65128719a 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_gc_insert_table.cpp @@ -1,27 +1,35 @@ #include "tx_gc_insert_table.h" +#include namespace NKikimr::NColumnShard { bool TTxInsertTableCleanup::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { + TMemoryProfileGuard mpg("TTxInsertTableCleanup::Execute"); TBlobGroupSelector dsGroupSelector(Self->Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); NIceDb::TNiceDb db(txc.DB); Self->TryAbortWrites(db, dbTable, std::move(WriteIdsToAbort)); - TBlobManagerDb blobManagerDb(txc.DB); + NOlap::TBlobManagerDb blobManagerDb(txc.DB); auto allAborted = Self->InsertTable->GetAborted(); auto storage = Self->StoragesManager->GetInsertOperator(); - BlobsAction = storage->StartDeclareRemovingAction("TX_CLEANUP"); + BlobsAction = storage->StartDeclareRemovingAction(NOlap::NBlobOperations::EConsumer::CLEANUP_INSERT_TABLE); for (auto& [abortedWriteId, abortedData] : allAborted) { - Self->InsertTable->EraseAborted(dbTable, abortedData, BlobsAction); + Self->InsertTable->EraseAbortedOnExecute(dbTable, abortedData, BlobsAction); } - BlobsAction->OnExecuteTxAfterRemoving(*Self, blobManagerDb, true); + BlobsAction->OnExecuteTxAfterRemoving(blobManagerDb, true); return true; } void TTxInsertTableCleanup::Complete(const TActorContext& /*ctx*/) { + TMemoryProfileGuard mpg("TTxInsertTableCleanup::Complete"); + auto allAborted = Self->InsertTable->GetAborted(); + for (auto& [abortedWriteId, abortedData] : allAborted) { + Self->InsertTable->EraseAbortedOnComplete(abortedData); + } + Y_ABORT_UNLESS(BlobsAction); - BlobsAction->OnCompleteTxAfterRemoving(*Self, true); + BlobsAction->OnCompleteTxAfterRemoving(true); Self->EnqueueBackgroundActivities(); } diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.cpp new file mode 100644 index 000000000000..74ad4c82b7e8 --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.cpp @@ -0,0 +1,22 @@ +#include "tx_remove_blobs.h" +#include + +namespace NKikimr::NColumnShard { + +bool TTxRemoveSharedBlobs::Execute(TTransactionContext& txc, const TActorContext&) { + TMemoryProfileGuard mpg("TTxRemoveSharedBlobs::Execute"); + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); + NOlap::TBlobManagerDb blobManagerDb(txc.DB); + RemoveAction->OnExecuteTxAfterRemoving(blobManagerDb, true); + return true; +} + +void TTxRemoveSharedBlobs::Complete(const TActorContext& ctx) { + TMemoryProfileGuard mpg("TTxRemoveSharedBlobs::Complete"); + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "complete"); + RemoveAction->OnCompleteTxAfterRemoving(true); + + ctx.Send(InitiatorActorId, new NOlap::NBlobOperations::NEvents::TEvDeleteSharedBlobsFinished((NOlap::TTabletId)Self->TabletID())); +} + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h new file mode 100644 index 000000000000..d716ba89860f --- /dev/null +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_remove_blobs.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include + +namespace NKikimr::NColumnShard { + +class TTxRemoveSharedBlobs: public TTransactionBase { +private: + std::shared_ptr RemoveAction; + const ui32 TabletTxNo; + const NActors::TActorId InitiatorActorId; + + TStringBuilder TxPrefix() const { + return TStringBuilder() << "TxWrite[" << ToString(TabletTxNo) << "] "; + } + + TString TxSuffix() const { + return TStringBuilder() << " at tablet " << Self->TabletID(); + } +public: + TTxRemoveSharedBlobs(TColumnShard* self, const std::shared_ptr& removeAction, const NActors::TActorId initiatorActorId) + : TBase(self) + , RemoveAction(removeAction) + , TabletTxNo(++Self->TabletTxCounter) + , InitiatorActorId(initiatorActorId) + {} + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + void Complete(const TActorContext& ctx) override; + TTxType GetTxType() const override { return TXTYPE_DELETE_SHARED_BLOBS; } +}; + + +} diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp index 1701f5ae6a5a..9b80f0033865 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.cpp @@ -1,6 +1,7 @@ #include "tx_write.h" namespace NKikimr::NColumnShard { + bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId) { NKikimrTxColumnShard::TLogicalMetadata meta; meta.SetNumRows(batch->GetRowsCount()); @@ -28,8 +29,8 @@ bool TTxWrite::InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSeriali return false; } - bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { + TMemoryProfileGuard mpg("TTxWrite::Execute"); NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); ACFL_DEBUG("event", "start_execute"); const NOlap::TWritingBuffer& buffer = PutBlobResult->Get()->MutableWritesBuffer(); @@ -66,29 +67,30 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { } } - TBlobManagerDb blobManagerDb(txc.DB); + NOlap::TBlobManagerDb blobManagerDb(txc.DB); AFL_VERIFY(buffer.GetAddActions().size() == 1); for (auto&& i : buffer.GetAddActions()) { i->OnExecuteTxAfterWrite(*Self, blobManagerDb, true); } for (auto&& i : buffer.GetRemoveActions()) { - i->OnExecuteTxAfterRemoving(*Self, blobManagerDb, true); + i->OnExecuteTxAfterRemoving(blobManagerDb, true); } for (auto&& aggr : buffer.GetAggregations()) { const auto& writeMeta = aggr->GetWriteData()->GetWriteMeta(); - std::unique_ptr result; - TWriteOperation::TPtr operation; if (!writeMeta.HasLongTxId()) { - operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); + auto operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation); Y_ABORT_UNLESS(operation->GetStatus() == EOperationStatus::Started); - } - if (operation) { operation->OnWriteFinish(txc, aggr->GetWriteIds()); - auto txInfo = Self->ProgressTxController->RegisterTxWithDeadline(operation->GetTxId(), NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, "", writeMeta.GetSource(), 0, txc); - Y_UNUSED(txInfo); - NEvents::TDataEvents::TCoordinatorInfo tInfo = Self->ProgressTxController->GetCoordinatorInfo(operation->GetTxId()); - Results.emplace_back(NEvents::TDataEvents::TEvWriteResult::BuildPrepared(Self->TabletID(), operation->GetTxId(), tInfo)); + if (operation->GetBehaviour() == EOperationBehaviour::InTxWrite) { + NKikimrTxColumnShard::TCommitWriteTxBody proto; + proto.SetLockId(operation->GetLockId()); + TString txBody; + Y_ABORT_UNLESS(proto.SerializeToString(&txBody)); + ProposeTransaction(TTxController::TBasicTxInfo(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, operation->GetLockId()), txBody, writeMeta.GetSource(), operation->GetCookie(), txc); + } else { + Results.emplace_back(NEvents::TDataEvents::TEvWriteResult::BuildCompleted(Self->TabletID(), operation->GetLockId())); + } } else { Y_ABORT_UNLESS(aggr->GetWriteIds().size() == 1); Results.emplace_back(std::make_unique(Self->TabletID(), writeMeta, (ui64)aggr->GetWriteIds().front(), NKikimrTxColumnShard::EResultStatus::SUCCESS)); @@ -97,7 +99,17 @@ bool TTxWrite::Execute(TTransactionContext& txc, const TActorContext&) { return true; } +void TTxWrite::OnProposeResult(TTxController::TProposeResult& proposeResult, const TTxController::TTxInfo& txInfo) { + Y_UNUSED(proposeResult); + Results.emplace_back(NEvents::TDataEvents::TEvWriteResult::BuildPrepared(Self->TabletID(), txInfo.TxId, Self->GetProgressTxController().BuildCoordinatorInfo(txInfo))); +} + +void TTxWrite::OnProposeError(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo) { + AFL_VERIFY("Unexpected behaviour")("tx_id", txInfo.TxId)("details", proposeResult.DebugString()); +} + void TTxWrite::Complete(const TActorContext& ctx) { + TMemoryProfileGuard mpg("TTxWrite::Complete"); NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "complete"); const auto now = TMonotonic::Now(); const NOlap::TWritingBuffer& buffer = PutBlobResult->Get()->MutableWritesBuffer(); @@ -105,12 +117,17 @@ void TTxWrite::Complete(const TActorContext& ctx) { i->OnCompleteTxAfterWrite(*Self, true); } for (auto&& i : buffer.GetRemoveActions()) { - i->OnCompleteTxAfterRemoving(*Self, true); + i->OnCompleteTxAfterRemoving(true); } AFL_VERIFY(buffer.GetAggregations().size() == Results.size()); for (ui32 i = 0; i < buffer.GetAggregations().size(); ++i) { const auto& writeMeta = buffer.GetAggregations()[i]->GetWriteData()->GetWriteMeta(); - ctx.Send(writeMeta.GetSource(), Results[i].release()); + auto operation = Self->OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); + if (operation) { + ctx.Send(writeMeta.GetSource(), Results[i].release(), 0, operation->GetCookie()); + } else { + ctx.Send(writeMeta.GetSource(), Results[i].release()); + } Self->CSCounters.OnWriteTxComplete(now - writeMeta.GetWriteStartInstant()); Self->CSCounters.OnSuccessWriteResponse(); } diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h index 6086542940f6..4af16dc94cee 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write.h @@ -1,13 +1,14 @@ #pragma once #include +#include #include namespace NKikimr::NColumnShard { -class TTxWrite : public TTransactionBase { +class TTxWrite : public TProposeTransactionBase { public: TTxWrite(TColumnShard* self, const TEvPrivate::TEvWriteBlobsResult::TPtr& putBlobResult) - : TBase(self) + : TProposeTransactionBase(self) , PutBlobResult(putBlobResult) , TabletTxNo(++Self->TabletTxCounter) {} @@ -16,13 +17,17 @@ class TTxWrite : public TTransactionBase { void Complete(const TActorContext& ctx) override; TTxType GetTxType() const override { return TXTYPE_WRITE; } - bool InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId); private: TEvPrivate::TEvWriteBlobsResult::TPtr PutBlobResult; const ui32 TabletTxNo; std::vector> Results; + + bool InsertOneBlob(TTransactionContext& txc, const NOlap::TWideSerializedBatch& batch, const TWriteId writeId); + void OnProposeResult(TTxController::TProposeResult& proposeResult, const TTxController::TTxInfo& txInfo) override; + void OnProposeError(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo) override; + TStringBuilder TxPrefix() const { return TStringBuilder() << "TxWrite[" << ToString(TabletTxNo) << "] "; } diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp index 429696ac9951..e6b9808493d2 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.cpp @@ -1,41 +1,46 @@ #include "tx_write_index.h" +#include #include +#include #include namespace NKikimr::NColumnShard { bool TTxWriteIndex::Execute(TTransactionContext& txc, const TActorContext& ctx) { auto changes = Ev->Get()->IndexChanges; + TMemoryProfileGuard mpg("TTxWriteIndex::Execute::" + changes->TypeString()); TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("external_task_id", changes->GetTaskIdentifier()); Y_ABORT_UNLESS(Self->InsertTable); Y_ABORT_UNLESS(Self->TablesManager.HasPrimaryIndex()); txc.DB.NoMoreReadsForTx(); - ACFL_DEBUG("event", "TTxWriteIndex::Execute")("change_type", changes->TypeString())("details", *changes); + ACFL_DEBUG("event", "TTxWriteIndex::Execute")("change_type", changes->TypeString())("details", changes->DebugString()); if (Ev->Get()->GetPutStatus() == NKikimrProto::OK) { NOlap::TSnapshot snapshot(Self->LastPlannedStep, Self->LastPlannedTxId); - Y_ABORT_UNLESS(Ev->Get()->IndexInfo.GetLastSchema()->GetSnapshot() <= snapshot); + Y_ABORT_UNLESS(Ev->Get()->IndexInfo->GetLastSchema()->GetSnapshot() <= snapshot); TBlobGroupSelector dsGroupSelector(Self->Info()); NOlap::TDbWrapper dbWrap(txc.DB, &dsGroupSelector); AFL_VERIFY(Self->TablesManager.MutablePrimaryIndex().ApplyChanges(dbWrap, changes, snapshot)); LOG_S_DEBUG(TxPrefix() << "(" << changes->TypeString() << ") apply" << TxSuffix()); - NOlap::TWriteIndexContext context(txc, dbWrap); - changes->WriteIndex(*Self, context); + NOlap::TWriteIndexContext context(&txc.DB, dbWrap, Self->MutableIndexAs()); + changes->WriteIndexOnExecute(Self, context); - changes->MutableBlobsAction().OnExecuteTxAfterAction(*Self, *context.BlobManagerDb, true); + NOlap::TBlobManagerDb blobManagerDb(txc.DB); + changes->MutableBlobsAction().OnExecuteTxAfterAction(*Self, blobManagerDb, true); Self->UpdateIndexCounters(); } else { TBlobGroupSelector dsGroupSelector(Self->Info()); - NColumnShard::TBlobManagerDb blobsDb(txc.DB); + NOlap::TBlobManagerDb blobsDb(txc.DB); changes->MutableBlobsAction().OnExecuteTxAfterAction(*Self, blobsDb, false); for (ui32 i = 0; i < changes->GetWritePortionsCount(); ++i) { - for (auto&& i : changes->GetWritePortionInfo(i)->GetPortionInfo().Records) { - LOG_S_WARN(TxPrefix() << "(" << changes->TypeString() << ":" << i.BlobRange << ") blob cannot apply changes: " << TxSuffix()); + auto& portion = changes->GetWritePortionInfo(i)->GetPortionInfo(); + for (auto&& i : portion.Records) { + LOG_S_WARN(TxPrefix() << "(" << changes->TypeString() << ":" << portion.RestoreBlobRange(i.BlobRange) << ") blob cannot apply changes: " << TxSuffix()); } } - NOlap::TChangesFinishContext context("cannot write index blobs"); + NOlap::TChangesFinishContext context("cannot write index blobs: " + ::ToString(Ev->Get()->GetPutStatus())); changes->Abort(*Self, context); LOG_S_ERROR(TxPrefix() << " (" << changes->TypeString() << ") cannot write index blobs" << TxSuffix()); } @@ -48,31 +53,27 @@ void TTxWriteIndex::Complete(const TActorContext& ctx) { TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())); CompleteReady = true; auto changes = Ev->Get()->IndexChanges; - ACFL_DEBUG("event", "TTxWriteIndex::Complete")("change_type", changes->TypeString())("details", *changes); + TMemoryProfileGuard mpg("TTxWriteIndex::Complete::" + changes->TypeString()); + ACFL_DEBUG("event", "TTxWriteIndex::Complete")("change_type", changes->TypeString())("details", changes->DebugString()); const ui64 blobsWritten = changes->GetBlobsAction().GetWritingBlobsCount(); const ui64 bytesWritten = changes->GetBlobsAction().GetWritingTotalSize(); if (!Ev->Get()->IndexChanges->IsAborted()) { - NOlap::TWriteIndexCompleteContext context(ctx, blobsWritten, bytesWritten, Ev->Get()->Duration, TriggerActivity); - Ev->Get()->IndexChanges->WriteIndexComplete(*Self, context); - } - - if (Ev->Get()->GetPutStatus() == NKikimrProto::TRYLATER) { - ctx.Schedule(Self->FailActivationDelay, new TEvPrivate::TEvPeriodicWakeup(true)); - } else { - Self->EnqueueBackgroundActivities(false, TriggerActivity); + NOlap::TWriteIndexCompleteContext context(ctx, blobsWritten, bytesWritten, Ev->Get()->Duration, Self->MutableIndexAs()); + Ev->Get()->IndexChanges->WriteIndexOnComplete(Self, context); } + Self->EnqueueBackgroundActivities(false); changes->MutableBlobsAction().OnCompleteTxAfterAction(*Self, Ev->Get()->GetPutStatus() == NKikimrProto::OK); - NYDBTest::TControllers::GetColumnShardController()->OnWriteIndexComplete(Self->TabletID(), changes->TypeString()); + NYDBTest::TControllers::GetColumnShardController()->OnWriteIndexComplete(*changes, *Self); } TTxWriteIndex::~TTxWriteIndex() { if (Ev) { auto changes = Ev->Get()->IndexChanges; if (!CompleteReady && changes) { - changes->AbortEmergency(); + changes->AbortEmergency("TTxWriteIndex destructor withno CompleteReady flag"); } } } diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.h b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.h index 5c20461c402a..3cf5d29a7219 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.h +++ b/ydb/core/tx/columnshard/blobs_action/transaction/tx_write_index.h @@ -22,7 +22,6 @@ class TTxWriteIndex: public TTransactionBase { TEvPrivate::TEvWriteIndex::TPtr Ev; const ui32 TabletTxNo; - TBackgroundActivity TriggerActivity = TBackgroundActivity::All(); bool CompleteReady = false; TStringBuilder TxPrefix() const { diff --git a/ydb/core/tx/columnshard/blobs_action/transaction/ya.make b/ydb/core/tx/columnshard/blobs_action/transaction/ya.make index 27268e5fd7f1..c78e93ef3b7e 100644 --- a/ydb/core/tx/columnshard/blobs_action/transaction/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/transaction/ya.make @@ -6,6 +6,7 @@ SRCS( tx_write_index.cpp tx_gc_insert_table.cpp tx_gc_indexed.cpp + tx_remove_blobs.cpp ) PEERDIR( @@ -13,6 +14,8 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/tablet_flat ydb/core/tx/tiering + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/blobs_action/events ) END() diff --git a/ydb/core/tx/columnshard/blobs_action/ya.make b/ydb/core/tx/columnshard/blobs_action/ya.make index 320cb63e4e79..12470a0aa080 100644 --- a/ydb/core/tx/columnshard/blobs_action/ya.make +++ b/ydb/core/tx/columnshard/blobs_action/ya.make @@ -2,7 +2,6 @@ LIBRARY() SRCS( blob_manager_db.cpp - memory.cpp ) PEERDIR( @@ -11,19 +10,10 @@ PEERDIR( ydb/core/tablet_flat ydb/core/tx/tiering ydb/core/tx/columnshard/blobs_action/abstract - ydb/core/tx/columnshard/blobs_action/bs ydb/core/tx/columnshard/blobs_action/counters ydb/core/tx/columnshard/blobs_action/transaction + ydb/core/tx/columnshard/blobs_action/events + ydb/core/tx/columnshard/blobs_action/protos ) -IF (OS_WINDOWS) - CFLAGS( - -DKIKIMR_DISABLE_S3_OPS - ) -ELSE() - PEERDIR( - ydb/core/tx/columnshard/blobs_action/tier - ) -ENDIF() - END() diff --git a/ydb/core/tx/columnshard/blobs_reader/actor.cpp b/ydb/core/tx/columnshard/blobs_reader/actor.cpp index c934ddef6d1a..3d27b9d8a0e8 100644 --- a/ydb/core/tx/columnshard/blobs_reader/actor.cpp +++ b/ydb/core/tx/columnshard/blobs_reader/actor.cpp @@ -14,13 +14,13 @@ void TActor::Handle(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult::TPtr& ev) bool aborted = false; if (event.Status != NKikimrProto::EReplyStatus::OK) { - WaitingBlobsCount.Sub(Task->GetWaitingCount()); - if (!Task->AddError(event.BlobRange, IBlobsReadingAction::TErrorStatus::Fail(event.Status, "cannot get blob: " + event.Data.substr(0, 1024)))) { + WaitingBlobsCount.Sub(Task->GetWaitingRangesCount()); + if (!Task->AddError(event.DataSourceId, event.BlobRange, IBlobsReadingAction::TErrorStatus::Fail(event.Status, "cannot get blob: " + event.Data.substr(0, 1024)))) { aborted = true; } } else { WaitingBlobsCount.Dec(); - Task->AddData(event.BlobRange, event.Data); + Task->AddData(event.DataSourceId, event.BlobRange, event.Data); } if (aborted || Task->IsFinished()) { Task = nullptr; @@ -46,8 +46,7 @@ void TActor::Bootstrap() { NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("external_task_id", externalTaskId); Task->StartBlobsFetching({}); ACFL_DEBUG("task", Task->DebugString()); - WaitingBlobsCount.Add(Task->GetReadRangesCount()); - AFL_VERIFY(Task->GetAllRangesSize()); + WaitingBlobsCount.Add(Task->GetWaitingRangesCount()); Become(&TThis::StateWait); if (Task->IsFinished()) { PassAway(); diff --git a/ydb/core/tx/columnshard/blobs_reader/read_coordinator.cpp b/ydb/core/tx/columnshard/blobs_reader/read_coordinator.cpp index 5b6a255faffb..df9a740d230c 100644 --- a/ydb/core/tx/columnshard/blobs_reader/read_coordinator.cpp +++ b/ydb/core/tx/columnshard/blobs_reader/read_coordinator.cpp @@ -1,63 +1,41 @@ -#include "read_coordinator.h" - -namespace NKikimr::NOlap::NBlobOperations::NRead { - -TAtomicCounter TReadCoordinatorActor::WaitingBlobsCount = 0; - -void TReadCoordinatorActor::Handle(TEvStartReadTask::TPtr& ev) { - const auto& externalTaskId = ev->Get()->GetTask()->GetExternalTaskId(); - NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("external_task_id", externalTaskId); - THashSet rangesInProgress; - for (auto&& agent : ev->Get()->GetTask()->GetAgents()) { - for (auto&& b : agent->GetRangesForRead()) { - for (auto&& r : b.second) { - auto it = BlobTasks.find(r); - if (it != BlobTasks.end()) { - ACFL_DEBUG("event", "TEvReadTask")("enqueued_blob_id", r); - rangesInProgress.emplace(r); - } else { - ACFL_DEBUG("event", "TEvReadTask")("blob_id", r); - it = BlobTasks.emplace(r, std::vector>()).first; - WaitingBlobsCount.Inc(); - } - it->second.emplace_back(ev->Get()->GetTask()); - } - } - } - ev->Get()->GetTask()->StartBlobsFetching(rangesInProgress); - ACFL_DEBUG("task", ev->Get()->GetTask()->DebugString()); - AFL_VERIFY(ev->Get()->GetTask()->GetAllRangesSize()); -} - -void TReadCoordinatorActor::Handle(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult::TPtr& ev) { - ACFL_TRACE("event", "TEvReadBlobRangeResult")("blob_id", ev->Get()->BlobRange); - - auto& event = *ev->Get(); - auto it = BlobTasks.find(event.BlobRange); - AFL_VERIFY(it != BlobTasks.end())("blob_id", event.BlobRange); - for (auto&& i : it->second) { - if (event.Status != NKikimrProto::EReplyStatus::OK) { - i->AddError(event.BlobRange, IBlobsReadingAction::TErrorStatus::Fail(event.Status, "cannot get blob")); - } else { - i->AddData(event.BlobRange, event.Data); - } - } - WaitingBlobsCount.Dec(); - BlobTasks.erase(it); -} - -TReadCoordinatorActor::TReadCoordinatorActor(ui64 tabletId, const TActorId& parent) - : TabletId(tabletId) - , Parent(parent) { - -} - -TReadCoordinatorActor::~TReadCoordinatorActor() { - for (auto&& i : BlobTasks) { - for (auto&& t : i.second) { - t->Abort(); - } - } -} - +#include "read_coordinator.h" + +namespace NKikimr::NOlap::NBlobOperations::NRead { + +void TReadCoordinatorActor::Handle(TEvStartReadTask::TPtr& ev) { + const auto& externalTaskId = ev->Get()->GetTask()->GetExternalTaskId(); + NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("external_task_id", externalTaskId); + THashSet rangesInProgress; + BlobTasks.AddTask(ev->Get()->GetTask()); + ev->Get()->GetTask()->StartBlobsFetching(rangesInProgress); + ACFL_DEBUG("task", ev->Get()->GetTask()->DebugString()); +} + +void TReadCoordinatorActor::Handle(NBlobCache::TEvBlobCache::TEvReadBlobRangeResult::TPtr& ev) { + ACFL_TRACE("event", "TEvReadBlobRangeResult")("blob_id", ev->Get()->BlobRange); + + auto& event = *ev->Get(); + auto tasks = BlobTasks.Extract(event.DataSourceId, event.BlobRange); + for (auto&& i : tasks) { + if (event.Status != NKikimrProto::EReplyStatus::OK) { + i->AddError(event.DataSourceId, event.BlobRange, IBlobsReadingAction::TErrorStatus::Fail(event.Status, "cannot get blob")); + } else { + i->AddData(event.DataSourceId, event.BlobRange, event.Data); + } + } +} + +TReadCoordinatorActor::TReadCoordinatorActor(ui64 tabletId, const TActorId& parent) + : TabletId(tabletId) + , Parent(parent) { + +} + +TReadCoordinatorActor::~TReadCoordinatorActor() { + auto tasks = BlobTasks.ExtractTasksAll(); + for (auto&& i : tasks) { + i->Abort(); + } +} + } diff --git a/ydb/core/tx/columnshard/blobs_reader/read_coordinator.h b/ydb/core/tx/columnshard/blobs_reader/read_coordinator.h index e89a29aee1b0..abd93b724a6d 100644 --- a/ydb/core/tx/columnshard/blobs_reader/read_coordinator.h +++ b/ydb/core/tx/columnshard/blobs_reader/read_coordinator.h @@ -12,13 +12,54 @@ namespace NKikimr::NOlap::NBlobOperations::NRead { +class TBlobsForRead { +private: + THashMap>>> BlobTasks; +public: + std::vector> ExtractTasksAll() { + THashMap> tasks; + for (auto&& i : BlobTasks) { + for (auto&& r : i.second) { + for (auto&& t : r.second) { + tasks.emplace(t->GetTaskIdentifier(), t); + } + } + } + std::vector> result; + for (auto&& i : tasks) { + result.emplace_back(i.second); + } + return result; + } + + std::vector> Extract(const TString& storageId, const TBlobRange& bRange) { + auto it = BlobTasks.find(storageId); + AFL_VERIFY(it != BlobTasks.end()); + auto itBlobRange = it->second.find(bRange); + auto result = std::move(itBlobRange->second); + it->second.erase(itBlobRange); + if (it->second.empty()) { + BlobTasks.erase(it); + } + return result; + } + + void AddTask(const std::shared_ptr& task) { + for (auto&& i : task->GetAgents()) { + auto& storage = BlobTasks[i.second->GetStorageId()]; + for (auto&& bRid : i.second->GetGroups()) { + storage[bRid.first].emplace_back(task); + } + } + } +}; + class TReadCoordinatorActor: public NActors::TActorBootstrapped { private: ui64 TabletId; NActors::TActorId Parent; - THashMap>> BlobTasks; + TBlobsForRead BlobTasks; public: - static TAtomicCounter WaitingBlobsCount; TReadCoordinatorActor(ui64 tabletId, const TActorId& parent); void Handle(TEvStartReadTask::TPtr& ev); @@ -42,4 +83,4 @@ class TReadCoordinatorActor: public NActors::TActorBootstrapped>& ITask::GetAgents() const { - Y_ABORT_UNLESS(!BlobsFetchingStarted); - return Agents; -} - -bool ITask::AddError(const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) { - ++BlobErrorsCount; +bool ITask::AddError(const TString& storageIdExt, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) { + const TString storageId = storageIdExt ? storageIdExt : IStoragesManager::DefaultStorageId; + AFL_VERIFY(--BlobsWaitingCount >= 0); if (TaskFinishedWithError || AbortFlag) { - ACFL_WARN("event", "SkipError")("blob_range", range)("message", status.GetErrorMessage())("status", status.GetStatus())("external_task_id", ExternalTaskId)("consumer", TaskCustomer) + ACFL_WARN("event", "SkipError")("storage_id", storageId)("blob_range", range)("message", status.GetErrorMessage())("status", status.GetStatus())("external_task_id", ExternalTaskId)("consumer", TaskCustomer) ("abort", AbortFlag)("finished_with_error", TaskFinishedWithError); return false; } else { - ACFL_ERROR("event", "NewError")("blob_range", range)("message", status.GetErrorMessage())("status", status.GetStatus())("external_task_id", ExternalTaskId)("consumer", TaskCustomer); + ACFL_ERROR("event", "NewError")("storage_id", storageId)("blob_range", range)("message", status.GetErrorMessage())("status", status.GetStatus())("external_task_id", ExternalTaskId)("consumer", TaskCustomer); } { - auto it = BlobsWaiting.find(range); - AFL_VERIFY(it != BlobsWaiting.end()); + auto it = AgentsWaiting.find(storageId); + AFL_VERIFY(it != AgentsWaiting.end())("storage_id", storageId); it->second->OnReadError(range, status); - BlobsWaiting.erase(it); + if (it->second->IsFinished()) { + AgentsWaiting.erase(it); + } } - if (!OnError(range, status)) { + if (!OnError(storageId, range, status)) { TaskFinishedWithError = true; return false; } - if (BlobsWaiting.empty()) { + if (AgentsWaiting.empty()) { OnDataReady(); } return true; } -void ITask::AddData(const TBlobRange& range, const TString& data) { - ++BlobsDataCount; +void ITask::AddData(const TString& storageIdExt, const TBlobRange& range, const TString& data) { + const TString storageId = storageIdExt ? storageIdExt : IStoragesManager::DefaultStorageId; + AFL_VERIFY(--BlobsWaitingCount >= 0); if (TaskFinishedWithError || AbortFlag) { - ACFL_WARN("event", "SkipDataAfterError")("external_task_id", ExternalTaskId)("abort", AbortFlag)("finished_with_error", TaskFinishedWithError); + ACFL_WARN("event", "SkipDataAfterError")("storage_id", storageId)("external_task_id", ExternalTaskId)("abort", AbortFlag)("finished_with_error", TaskFinishedWithError); return; } else { - ACFL_TRACE("event", "NewData")("range", range.ToString())("external_task_id", ExternalTaskId); + ACFL_TRACE("event", "NewData")("storage_id", storageId)("range", range.ToString())("external_task_id", ExternalTaskId); } Y_ABORT_UNLESS(BlobsFetchingStarted); { - auto it = BlobsWaiting.find(range); - AFL_VERIFY(it != BlobsWaiting.end()); + auto it = AgentsWaiting.find(storageId); + AFL_VERIFY(it != AgentsWaiting.end())("storage_id", storageId); it->second->OnReadResult(range, data); - BlobsWaiting.erase(it); + if (it->second->IsFinished()) { + AgentsWaiting.erase(it); + } } - if (BlobsWaiting.empty()) { + if (AgentsWaiting.empty()) { OnDataReady(); } } @@ -59,24 +60,15 @@ void ITask::StartBlobsFetching(const THashSet& rangesInProgress) { ACFL_TRACE("task_id", ExternalTaskId)("event", "start"); Y_ABORT_UNLESS(!BlobsFetchingStarted); BlobsFetchingStarted = true; - ui64 allRangesSize = 0; - ui64 allRangesCount = 0; - ui64 readRangesCount = 0; + AFL_VERIFY(BlobsWaitingCount == 0); for (auto&& agent : Agents) { - allRangesSize += agent->GetExpectedBlobsSize(); - allRangesCount += agent->GetExpectedBlobsCount(); - for (auto&& b : agent->GetRangesForRead()) { - for (auto&& r : b.second) { - BlobsWaiting.emplace(r, agent); - ++readRangesCount; - } + agent.second->Start(rangesInProgress); + if (!agent.second->IsFinished()) { + AgentsWaiting.emplace(agent.second->GetStorageId(), agent.second); + BlobsWaitingCount += agent.second->GetGroups().size(); } - agent->Start(rangesInProgress); } - ReadRangesCount = readRangesCount; - AllRangesCount = allRangesCount; - AllRangesSize = allRangesSize; - if (BlobsWaiting.empty()) { + if (AgentsWaiting.empty()) { OnDataReady(); } } @@ -91,15 +83,15 @@ void ITask::TReadSubscriber::DoOnAllocationSuccess(const std::shared_ptr>& actions, const TString& taskCustomer, const TString& externalTaskId) - : Agents(actions) - , TaskIdentifier(TaskIdentifierBuilder.Inc()) +ITask::ITask(const TReadActionsCollection& actions, const TString& taskCustomer, const TString& externalTaskId) + : TaskIdentifier(TaskIdentifierBuilder.Inc()) , ExternalTaskId(externalTaskId) , TaskCustomer(taskCustomer) { - AFL_VERIFY(Agents.size()); + Agents = actions; + AFL_VERIFY(!Agents.IsEmpty()); for (auto&& i : Agents) { - AFL_VERIFY(i->GetExpectedBlobsCount()); + AFL_VERIFY(i.second->GetExpectedBlobsCount()); } } @@ -108,11 +100,7 @@ TString ITask::DebugString() const { if (TaskFinishedWithError) { sb << "finished_with_error=" << TaskFinishedWithError << ";"; } - if (BlobErrorsCount) { - sb << "blob_errors=" << BlobErrorsCount << ";"; - } - sb << "data=" << BlobsDataCount << ";" - << "waiting=" << BlobsWaiting.size() << ";" + sb << "agents_waiting=" << AgentsWaiting.size() << ";" << "additional_info=(" << DoDebugString() << ");" ; return sb; @@ -125,22 +113,22 @@ void ITask::OnDataReady() { DoOnDataReady(ResourcesGuard); } -bool ITask::OnError(const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) { +bool ITask::OnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) { ACFL_DEBUG("event", "OnError")("status", status.GetStatus())("task", DebugString()); - return DoOnError(range, status); + return DoOnError(storageId, range, status); } ITask::~ITask() { AFL_VERIFY(!NActors::TlsActivationContext || DataIsReadyFlag || TaskFinishedWithError || AbortFlag || !BlobsFetchingStarted); } -THashMap ITask::ExtractBlobsData() { - AFL_VERIFY(BlobsWaiting.empty()); +TCompositeReadBlobs ITask::ExtractBlobsData() { + AFL_VERIFY(AgentsWaiting.empty()); AFL_VERIFY(!ResultsExtracted); ResultsExtracted = true; - THashMap result; + TCompositeReadBlobs result; for (auto&& i : Agents) { - i->ExtractBlobsDataTo(result); + result.Add(i.second->GetStorageId(), i.second->ExtractBlobsData()); } return std::move(result); } diff --git a/ydb/core/tx/columnshard/blobs_reader/task.h b/ydb/core/tx/columnshard/blobs_reader/task.h index 65525b03f434..bc2796e9dda1 100644 --- a/ydb/core/tx/columnshard/blobs_reader/task.h +++ b/ydb/core/tx/columnshard/blobs_reader/task.h @@ -9,47 +9,106 @@ namespace NKikimr::NOlap::NBlobOperations::NRead { +class TCompositeReadBlobs { +private: + THashMap BlobsByStorage; +public: + TString DebugString() const { + TStringBuilder sb; + sb << "{"; + for (auto&& i : BlobsByStorage) { + sb << "{storage_id:" << i.first << ";blobs:" << i.second.DebugString() << "};"; + } + sb << "}"; + return sb; + } + + void Merge(TCompositeReadBlobs&& blobs) { + for (auto&& i : blobs.BlobsByStorage) { + BlobsByStorage[i.first].Merge(std::move(i.second)); + } + } + + void Clear() { + BlobsByStorage.clear(); + } + + bool IsEmpty() const { + return BlobsByStorage.empty(); + } + + THashMap::iterator begin() { + return BlobsByStorage.begin(); + } + THashMap::iterator end() { + return BlobsByStorage.end(); + } + void Add(const TString& storageId, TActionReadBlobs&& data) { + AFL_VERIFY(BlobsByStorage.emplace(storageId, std::move(data)).second); + } + void Add(const TString& storageId, const TBlobRange& blobId, TString&& value) { + BlobsByStorage[storageId].Add(blobId, std::move(value)); + } + TString Extract(const TString& storageId, const TBlobRange& range) { + auto it = BlobsByStorage.find(storageId); + AFL_VERIFY(it != BlobsByStorage.end()); + auto result = it->second.Extract(range); + if (it->second.IsEmpty()) { + BlobsByStorage.erase(it); + } + return result; + } + + ui64 GetTotalBlobsSize() const { + ui64 result = 0; + for (auto&& i : BlobsByStorage) { + result += i.second.GetTotalBlobsSize(); + } + return result; + } +}; + class ITask: public NColumnShard::TMonitoringObjectsCounter { private: - THashMap> BlobsWaiting; - std::vector> Agents; + THashMap> AgentsWaiting; + YDB_READONLY_DEF(TReadActionsCollection, Agents); bool BlobsFetchingStarted = false; bool TaskFinishedWithError = false; bool DataIsReadyFlag = false; const ui64 TaskIdentifier = 0; const TString ExternalTaskId; bool AbortFlag = false; - std::optional AllRangesSize; - std::optional AllRangesCount; - std::optional ReadRangesCount; TString TaskCustomer; std::shared_ptr ResourcesGuard; - ui32 BlobErrorsCount = 0; - ui32 BlobsDataCount = 0; + i64 BlobsWaitingCount = 0; bool ResultsExtracted = false; protected: bool IsFetchingStarted() const { return BlobsFetchingStarted; } - THashMap ExtractBlobsData(); + TCompositeReadBlobs ExtractBlobsData(); virtual void DoOnDataReady(const std::shared_ptr& resourcesGuard) = 0; - virtual bool DoOnError(const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) = 0; + virtual bool DoOnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) = 0; void OnDataReady(); - bool OnError(const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status); + bool OnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status); virtual TString DoDebugString() const { return ""; } public: + i64 GetWaitingRangesCount() const { + return BlobsWaitingCount; + } + void Abort() { AbortFlag = true; } bool IsFinished() const { - return BlobsWaiting.empty() && BlobsFetchingStarted; + return AgentsWaiting.empty() && BlobsFetchingStarted; } ui64 GetTaskIdentifier() const { @@ -62,43 +121,14 @@ class ITask: public NColumnShard::TMonitoringObjectsCounter { TString DebugString() const; - ui64 GetAllRangesSize() const { - Y_ABORT_UNLESS(AllRangesSize); - return *AllRangesSize; - } - - ui64 GetAllRangesCount() const { - Y_ABORT_UNLESS(AllRangesCount); - return *AllRangesCount; - } - - ui64 GetReadRangesCount() const { - Y_ABORT_UNLESS(ReadRangesCount); - return *ReadRangesCount; - } - - ui32 GetWaitingCount() const { - return BlobsWaiting.size(); - } - - THashSet GetExpectedRanges() const { - THashSet result; - for (auto&& i : Agents) { - i->FillExpectedRanges(result); - } - return result; - } - - const std::vector>& GetAgents() const; - virtual ~ITask(); - ITask(const std::vector>& actions, const TString& taskCustomer, const TString& externalTaskId = ""); + ITask(const TReadActionsCollection& actions, const TString& taskCustomer, const TString& externalTaskId = ""); void StartBlobsFetching(const THashSet& rangesInProgress); - bool AddError(const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status); - void AddData(const TBlobRange& range, const TString& data); + bool AddError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status); + void AddData(const TString& storageId, const TBlobRange& range, const TString& data); class TReadSubscriber: public NResourceBroker::NSubscribe::ITask { private: diff --git a/ydb/core/tx/columnshard/columnshard.cpp b/ydb/core/tx/columnshard/columnshard.cpp index 7af0a0e384ed..a1f93cbe7ac2 100644 --- a/ydb/core/tx/columnshard/columnshard.cpp +++ b/ydb/core/tx/columnshard/columnshard.cpp @@ -3,6 +3,11 @@ #include "hooks/abstract/abstract.h" #include "resource_subscriber/actor.h" #include "engines/writer/buffer/actor.h" +#include "engines/column_engine_logs.h" +#include "export/manager/manager.h" + +#include +#include namespace NKikimr { @@ -19,9 +24,12 @@ void TColumnShard::CleanupActors(const TActorContext& ctx) { ctx.Send(BufferizationWriteActorId, new TEvents::TEvPoisonPill); StoragesManager->Stop(); + ExportsManager->Stop(); + DataLocksManager->Stop(); if (Tiers) { Tiers->Stop(true); } + NYDBTest::TControllers::GetColumnShardController()->OnCleanupActors(TabletID()); } void TColumnShard::BecomeBroken(const TActorContext& ctx) { @@ -36,9 +44,8 @@ void TColumnShard::SwitchToWork(const TActorContext& ctx) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "SwitchToWork"); for (auto&& i : TablesManager.GetTables()) { - ActivateTiering(i.first, i.second.GetTieringUsage(), true); + ActivateTiering(i.first, i.second.GetTieringUsage()); } - OnTieringModified(); Become(&TThis::StateWork); SignalTabletActive(ctx); @@ -46,8 +53,10 @@ void TColumnShard::SwitchToWork(const TActorContext& ctx) { TryRegisterMediatorTimeCast(); EnqueueProgressTx(ctx); } + CSCounters.OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); EnqueueBackgroundActivities(); ctx.Send(SelfId(), new TEvPrivate::TEvPeriodicWakeup()); + NYDBTest::TControllers::GetColumnShardController()->OnSwitchToWork(TabletID()); } void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { @@ -57,6 +66,7 @@ void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { Executor()->RegisterExternalTabletCounters(TabletCountersPtr.release()); const auto selfActorId = SelfId(); + StoragesManager->Initialize(); Tiers = std::make_shared(TabletID(), SelfId(), [selfActorId](const TActorContext& ctx) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_new_event"); @@ -70,7 +80,6 @@ void TColumnShard::OnActivateExecutor(const TActorContext& ctx) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "initialize_shard")("step", "initialize_tiring_finished"); auto& icb = *AppData(ctx)->Icb; Limits.RegisterControls(icb); - CompactionLimits.RegisterControls(icb); Settings.RegisterControls(icb); ResourceSubscribeActor = ctx.Register(new NOlap::NResourceBroker::NSubscribe::TActor(TabletID(), SelfId())); BufferizationWriteActorId = ctx.Register(new NColumnShard::NWriting::TActor(TabletID(), SelfId())); @@ -139,12 +148,14 @@ void TColumnShard::Handle(TEvPrivate::TEvReadFinished::TPtr& ev, const TActorCon Y_UNUSED(ctx); ui64 readCookie = ev->Get()->RequestCookie; LOG_S_DEBUG("Finished read cookie: " << readCookie << " at tablet " << TabletID()); - InFlightReadsTracker.RemoveInFlightRequest(ev->Get()->RequestCookie); + const NOlap::TVersionedIndex* index = nullptr; + if (HasIndex()) { + index = &GetIndexAs().GetVersionedIndex(); + } + InFlightReadsTracker.RemoveInFlightRequest(ev->Get()->RequestCookie, index); ui64 txId = ev->Get()->TxId; if (ScanTxInFlight.contains(txId)) { - TDuration duration = TAppData::TimeProvider->Now() - ScanTxInFlight[txId]; - IncCounter(COUNTER_SCAN_LATENCY, duration); ScanTxInFlight.erase(txId); SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); } @@ -214,7 +225,6 @@ void TColumnShard::UpdateIndexCounters() { auto& stats = TablesManager.MutablePrimaryIndex().GetTotalStats(); SetCounter(COUNTER_INDEX_TABLES, stats.Tables); SetCounter(COUNTER_INDEX_COLUMN_RECORDS, stats.ColumnRecords); - SetCounter(COUNTER_INDEX_COLUMN_METADATA_BYTES, stats.ColumnMetadataBytes); SetCounter(COUNTER_INSERTED_PORTIONS, stats.GetInsertedStats().Portions); SetCounter(COUNTER_INSERTED_BLOBS, stats.GetInsertedStats().Blobs); SetCounter(COUNTER_INSERTED_ROWS, stats.GetInsertedStats().Rows); @@ -247,7 +257,7 @@ void TColumnShard::UpdateIndexCounters() { << " s-compacted " << stats.GetSplitCompactedStats().DebugString() << " inactive " << stats.GetInactiveStats().DebugString() << " evicted " << stats.GetEvictedStats().DebugString() - << " column records " << stats.ColumnRecords << " meta bytes " << stats.ColumnMetadataBytes + << " column records " << stats.ColumnRecords << " at tablet " << TabletID()); } @@ -315,14 +325,6 @@ void TColumnShard::ConfigureStats(const NOlap::TColumnEngineStats& indexStats, tabletStats->SetLastUpdateTime(lastIndexUpdate.GetPlanStep()); } -TDuration TColumnShard::GetControllerPeriodicWakeupActivationPeriod() { - return NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod(TSettings::DefaultPeriodicWakeupActivationPeriod); -} - -TDuration TColumnShard::GetControllerStatsReportInterval() { - return NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval(TSettings::DefaultStatsReportInterval); -} - void TColumnShard::FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const { tableStats->SetTxRejectedByOverload(TabletCounters->Cumulative()[COUNTER_WRITE_OVERLOAD].Get()); tableStats->SetTxRejectedBySpace(TabletCounters->Cumulative()[COUNTER_OUT_OF_SPACE].Get()); diff --git a/ydb/core/tx/columnshard/columnshard.h b/ydb/core/tx/columnshard/columnshard.h index 5e5e6f10a000..09348a4fd4ab 100644 --- a/ydb/core/tx/columnshard/columnshard.h +++ b/ydb/core/tx/columnshard/columnshard.h @@ -66,6 +66,22 @@ struct TEvColumnShard { EvWriteResult, EvReadResult, + EvDeleteSharedBlobs, + EvDeleteSharedBlobsFinished, + + EvDataSharingProposeFromInitiator, + EvDataSharingConfirmFromInitiator, + EvDataSharingAckFinishFromInitiator, + EvDataSharingStartToSource, + EvDataSharingSendDataFromSource, + EvDataSharingAckDataToSource, + EvDataSharingFinishedFromSource, + EvDataSharingAckFinishToSource, + EvDataSharingCheckStatusFromInitiator, + EvDataSharingCheckStatusResult, + EvApplyLinksModification, + EvApplyLinksModificationFinished, + EvEnd }; @@ -210,8 +226,7 @@ struct TEvColumnShard { } }; - struct TEvWriteResult : public TEventPB { + struct TEvWriteResult : public TEventPB { TEvWriteResult() = default; TEvWriteResult(ui64 origin, const NEvWrite::TWriteMeta& writeMeta, ui32 status) @@ -235,6 +250,7 @@ struct TEvColumnShard { }; using TEvScan = TEvDataShard::TEvKqpScan; + }; inline auto& Proto(TEvColumnShard::TEvProposeTransaction* ev) { diff --git a/ydb/core/tx/columnshard/columnshard__init.cpp b/ydb/core/tx/columnshard/columnshard__init.cpp index 3e11ed9e9217..c2304240381a 100644 --- a/ydb/core/tx/columnshard/columnshard__init.cpp +++ b/ydb/core/tx/columnshard/columnshard__init.cpp @@ -2,7 +2,10 @@ #include "columnshard_ttl.h" #include "columnshard_private_events.h" #include "columnshard_schema.h" +#include "blobs_action/storages_manager/manager.h" #include "hooks/abstract/abstract.h" +#include "engines/column_engine_logs.h" +#include "export/manager/manager.h" #include #include @@ -60,16 +63,17 @@ bool TTxInit::Precharge(TTransactionContext& txc) { ready = ready & Schema::Precharge(db, txc.DB.GetScheme()); ready = ready & Schema::Precharge(db, txc.DB.GetScheme()); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::CurrentSchemeShardId, Self->CurrentSchemeShardId); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::LastSchemaSeqNoGeneration, Self->LastSchemaSeqNo.Generation); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::LastSchemaSeqNoRound, Self->LastSchemaSeqNo.Round); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::CurrentSchemeShardId, Self->CurrentSchemeShardId); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastSchemaSeqNoGeneration, Self->LastSchemaSeqNo.Generation); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastSchemaSeqNoRound, Self->LastSchemaSeqNo.Round); ready = ready && Schema::GetSpecialProtoValue(db, Schema::EValueIds::ProcessingParams, Self->ProcessingParams); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::LastWriteId, Self->LastWriteId); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::LastPlannedStep, Self->LastPlannedStep); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::LastPlannedTxId, Self->LastPlannedTxId); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::LastExportNumber, Self->LastExportNo); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::OwnerPathId, Self->OwnerPathId); - ready = ready && Schema::GetSpecialValue(db, Schema::EValueIds::OwnerPath, Self->OwnerPath); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastWriteId, Self->LastWriteId); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastPlannedStep, Self->LastPlannedStep); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastPlannedTxId, Self->LastPlannedTxId); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::LastExportNumber, Self->LastExportNo); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::OwnerPathId, Self->OwnerPathId); + ready = ready && Schema::GetSpecialValueOpt(db, Schema::EValueIds::OwnerPath, Self->OwnerPath); + if (!ready) { return false; @@ -86,53 +90,53 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) TBlobGroupSelector dsGroupSelector(Self->Info()); NOlap::TDbWrapper dbTable(txc.DB, &dsGroupSelector); { - ACFL_INFO("step", "TInsertTable::Load_Start"); + ACFL_DEBUG("step", "TInsertTable::Load_Start"); TMemoryProfileGuard g("TTxInit/InsertTable"); auto localInsertTable = std::make_unique(); if (!localInsertTable->Load(dbTable, TAppData::TimeProvider->Now())) { ACFL_ERROR("step", "TInsertTable::Load_Fails"); return false; } - ACFL_INFO("step", "TInsertTable::Load_Finish"); + ACFL_DEBUG("step", "TInsertTable::Load_Finish"); Self->InsertTable.swap(localInsertTable); } { - ACFL_INFO("step", "TTxController::Load_Start"); + ACFL_DEBUG("step", "TTxController::Load_Start"); TMemoryProfileGuard g("TTxInit/TTxController"); auto localTxController = std::make_unique(*Self); if (!localTxController->Load(txc)) { ACFL_ERROR("step", "TTxController::Load_Fails"); return false; } - ACFL_INFO("step", "TTxController::Load_Finish"); + ACFL_DEBUG("step", "TTxController::Load_Finish"); Self->ProgressTxController.swap(localTxController); } { - ACFL_INFO("step", "TOperationsManager::Load_Start"); + ACFL_DEBUG("step", "TOperationsManager::Load_Start"); TMemoryProfileGuard g("TTxInit/TOperationsManager"); auto localOperationsManager = std::make_unique(); if (!localOperationsManager->Load(txc)) { ACFL_ERROR("step", "TOperationsManager::Load_Fails"); return false; } - ACFL_INFO("step", "TOperationsManager::Load_Finish"); + ACFL_DEBUG("step", "TOperationsManager::Load_Finish"); Self->OperationsManager.swap(localOperationsManager); } { - TBlobManagerDb blobManagerDb(txc.DB); - TMemoryProfileGuard g("TTxInit/StoragesManager"); - for (auto&& i : Self->StoragesManager->GetStorages()) { - if (!i.second->Load(blobManagerDb)) { - ACFL_ERROR("event", "storages manager load")("storage", i.first); - return false; - } + ACFL_DEBUG("step", "TStoragesManager::Load_Start"); + AFL_VERIFY(Self->StoragesManager); + TMemoryProfileGuard g("TTxInit/NDataSharing::TStoragesManager"); + if (!Self->StoragesManager->LoadIdempotency(txc.DB)) { + return false; } + ACFL_DEBUG("step", "TStoragesManager::Load_Finish"); } + { - ACFL_INFO("step", "TTablesManager::Load_Start"); + ACFL_DEBUG("step", "TTablesManager::Load_Start"); TTablesManager tManagerLocal(Self->StoragesManager, Self->TabletID()); { TMemoryProfileGuard g("TTxInit/TTablesManager"); @@ -153,7 +157,7 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) Self->SetCounter(COUNTER_TABLES, Self->TablesManager.GetTables().size()); Self->SetCounter(COUNTER_TABLE_PRESETS, Self->TablesManager.GetSchemaPresets().size()); Self->SetCounter(COUNTER_TABLE_TTLS, Self->TablesManager.GetTtl().PathsCount()); - ACFL_INFO("step", "TTablesManager::Load_Finish"); + ACFL_DEBUG("step", "TTablesManager::Load_Finish"); } { @@ -178,6 +182,24 @@ bool TTxInit::ReadEverything(TTransactionContext& txc, const TActorContext& ctx) } } + { + TMemoryProfileGuard g("TTxInit/NDataSharing::TExportsManager"); + auto local = std::make_shared(); + if (!local->Load(txc.DB)) { + return false; + } + Self->ExportsManager = local; + } + + { + TMemoryProfileGuard g("TTxInit/NDataSharing::TSessionsManager"); + auto local = std::make_shared(); + if (!local->Load(txc.DB, Self->TablesManager.GetPrimaryIndexAsOptional())) { + return false; + } + Self->SharingSessionsManager = local; + } + Self->UpdateInsertTableCounters(); Self->UpdateIndexCounters(); Self->UpdateResourceMetrics(ctx, {}); @@ -207,6 +229,7 @@ bool TTxInit::Execute(TTransactionContext& txc, const TActorContext& ctx) { void TTxInit::Complete(const TActorContext& ctx) { Self->ProgressTxController->OnTabletInit(); Self->SwitchToWork(ctx); + NYDBTest::TControllers::GetColumnShardController()->OnTabletInitCompleted(*Self); } class TTxUpdateSchema : public TTransactionBase { @@ -231,8 +254,11 @@ bool TTxUpdateSchema::Execute(TTransactionContext& txc, const TActorContext&) { if (result.IsSuccess()) { NormalizerTasks = result.DetachResult(); if (!NormalizerTasks.empty()) { + ACFL_WARN("normalizer_controller", Self->NormalizerController.DebugString())("tasks_count", NormalizerTasks.size()); break; } + NIceDb::TNiceDb db(txc.DB); + Self->NormalizerController.UpdateControllerState(db); Self->NormalizerController.SwitchNormalizer(); } else { Self->NormalizerController.GetCounters().OnNormalizerFails(); @@ -253,7 +279,7 @@ void TTxUpdateSchema::Complete(const TActorContext& ctx) { } NOlap::TNormalizationContext nCtx; - nCtx.SetColumnshardActor(Self->SelfId()); + nCtx.SetShardActor(Self->SelfId()); nCtx.SetResourceSubscribeActor(Self->ResourceSubscribeActor); for (auto&& task : NormalizerTasks) { @@ -280,14 +306,24 @@ class TTxApplyNormalizer : public TTransactionBase { bool TTxApplyNormalizer::Execute(TTransactionContext& txc, const TActorContext&) { NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("event", "initialize_shard"); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("step", "TTxApplyNormalizer.Execute")("details", Self->NormalizerController.DebugString()); - return Changes->Apply(txc, Self->NormalizerController); + if (!Changes->ApplyOnExecute(txc, Self->NormalizerController)) { + return false; + } + + if (Self->NormalizerController.GetNormalizer()->GetActiveTasksCount() == 1) { + NIceDb::TNiceDb db(txc.DB); + Self->NormalizerController.UpdateControllerState(db); + } + return true; } void TTxApplyNormalizer::Complete(const TActorContext& ctx) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("step", "TTxApplyNormalizer.Complete")("tablet_id", Self->TabletID())("event", "initialize_shard"); AFL_VERIFY(!Self->NormalizerController.IsNormalizationFinished())("details", Self->NormalizerController.DebugString()); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("event", "apply_normalizer_changes")("details", Self->NormalizerController.DebugString())("size", Changes->GetSize()); + Changes->ApplyOnComplete(Self->NormalizerController); Self->NormalizerController.GetNormalizer()->OnResultReady(); - if (Self->NormalizerController.GetNormalizer()->WaitResult()) { + if (Self->NormalizerController.GetNormalizer()->HasActiveTasks()) { return; } @@ -328,6 +364,9 @@ bool TTxInitSchema::Execute(TTransactionContext& txc, const TActorContext&) { } } + // NIceDb::TNiceDb db(txc.DB); + // Self->NormalizerController.InitControllerState(db); + // Enable compression for the SmallBlobs table const auto* smallBlobsDefaultColumnFamily = txc.DB.GetScheme().DefaultFamilyFor(Schema::SmallBlobs::TableId); if (!smallBlobsDefaultColumnFamily || @@ -352,7 +391,7 @@ bool TTxInitSchema::Execute(TTransactionContext& txc, const TActorContext&) { } void TTxInitSchema::Complete(const TActorContext& ctx) { - LOG_S_DEBUG("TxInitSchema.Complete at tablet " << Self->TabletID();) + LOG_S_DEBUG("TxInitSchema.Complete at tablet " << Self->TabletID();); Self->Execute(new TTxUpdateSchema(Self), ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp index 42597d49275d..35d867caf4e4 100644 --- a/ydb/core/tx/columnshard/columnshard__progress_tx.cpp +++ b/ydb/core/tx/columnshard/columnshard__progress_tx.cpp @@ -18,6 +18,7 @@ class TColumnShard::TTxProgressTx : public TTransactionBase { bool Execute(TTransactionContext& txc, const TActorContext& ctx) override { NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); Y_ABORT_UNLESS(Self->ProgressTxInFlight); + Self->TabletCounters->Simple()[COUNTER_TX_COMPLETE_LAG].Set(Self->GetTxCompleteLag().MilliSeconds()); size_t removedCount = Self->ProgressTxController->CleanExpiredTxs(txc); if (removedCount > 0) { @@ -29,6 +30,7 @@ class TColumnShard::TTxProgressTx : public TTransactionBase { // Process a single transaction at the front of the queue auto plannedItem = Self->ProgressTxController->StartPlannedTx(); if (!!plannedItem) { + PlannedQueueItem.emplace(plannedItem->PlanStep, plannedItem->TxId); ui64 step = plannedItem->PlanStep; ui64 txId = plannedItem->TxId; @@ -50,12 +52,16 @@ class TColumnShard::TTxProgressTx : public TTransactionBase { if (TxOperator) { TxOperator->Complete(*Self, ctx); } + if (PlannedQueueItem) { + Self->GetProgressTxController().CompleteRunningTx(*PlannedQueueItem); + } Self->SetupIndexation(); } private: TTxController::ITransactionOperatior::TPtr TxOperator; const ui32 TabletTxNo; + std::optional PlannedQueueItem; }; void TColumnShard::EnqueueProgressTx(const TActorContext& ctx) { diff --git a/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp b/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp index 4f04f60ff8d1..3688e74126ed 100644 --- a/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp +++ b/ydb/core/tx/columnshard/columnshard__propose_transaction.cpp @@ -3,17 +3,17 @@ #include "columnshard_schema.h" #include #include +#include namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; -class TTxProposeTransaction : public NTabletFlatExecutor::TTransactionBase { +class TTxProposeTransaction : public TProposeTransactionBase { public: TTxProposeTransaction(TColumnShard* self, TEvColumnShard::TEvProposeTransaction::TPtr& ev) - : TBase(self) + : TProposeTransactionBase(self) , Ev(ev) - , TabletTxNo(++Self->TabletTxCounter) {} bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; @@ -22,25 +22,16 @@ class TTxProposeTransaction : public NTabletFlatExecutor::TTransactionBase Result; - TStringBuilder TxPrefix() const { - return TStringBuilder() << "TxProposeTransaction[" << ToString(TabletTxNo) << "] "; - } - - TString TxSuffix() const { - return TStringBuilder() << " at tablet " << Self->TabletID(); - } - - void ConstructResult(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo); + void OnProposeResult(TTxController::TProposeResult& proposeResult, const TTxController::TTxInfo& txInfo) override; + void OnProposeError(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo) override; TTxController::TProposeResult ProposeTtlDeprecated(const TString& txBody); }; bool TTxProposeTransaction::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { Y_ABORT_UNLESS(Ev); - LOG_S_DEBUG(TxPrefix() << "execute" << TxSuffix()); txc.DB.NoMoreReadsForTx(); NIceDb::TNiceDb db(txc.DB); @@ -48,9 +39,9 @@ bool TTxProposeTransaction::Execute(TTransactionContext& txc, const TActorContex Self->IncCounter(COUNTER_PREPARE_REQUEST); auto& record = Proto(Ev->Get()); - auto txKind = record.GetTxKind(); - ui64 txId = record.GetTxId(); - auto& txBody = record.GetTxBody(); + const auto txKind = record.GetTxKind(); + const ui64 txId = record.GetTxId(); + const auto& txBody = record.GetTxBody(); if (txKind == NKikimrTxColumnShard::TX_KIND_TTL) { auto proposeResult = ProposeTtlDeprecated(txBody); @@ -71,39 +62,7 @@ bool TTxProposeTransaction::Execute(TTransactionContext& txc, const TActorContex Y_ABORT_UNLESS(Self->CurrentSchemeShardId == record.GetSchemeShardId()); } } - - TTxController::TBasicTxInfo fakeTxInfo; - fakeTxInfo.TxId = txId; - fakeTxInfo.TxKind = txKind; - - auto txOperator = TTxController::ITransactionOperatior::TFactory::MakeHolder(txKind, fakeTxInfo); - if (!txOperator || !txOperator->Parse(txBody)) { - TTxController::TProposeResult proposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Error processing commit TxId# " << txId - << (txOperator ? ". Parsing error " : ". Unknown operator for txKind")); - ConstructResult(proposeResult, fakeTxInfo); - return true; - } - - auto txInfoPtr = Self->ProgressTxController->GetTxInfo(txId); - if (!!txInfoPtr) { - if (txInfoPtr->Source != Ev->Get()->GetSource() || txInfoPtr->Cookie != Ev->Cookie) { - TTxController::TProposeResult proposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Another commit TxId# " << txId << " has already been proposed"); - ConstructResult(proposeResult, fakeTxInfo); - } - TTxController::TProposeResult proposeResult; - ConstructResult(proposeResult, *txInfoPtr); - } else { - auto proposeResult = txOperator->Propose(*Self, txc, false); - if (!!proposeResult) { - const auto& txInfo = txOperator->TxWithDeadline() ? Self->ProgressTxController->RegisterTxWithDeadline(txId, txKind, txBody, Ev->Get()->GetSource(), Ev->Cookie, txc) - : Self->ProgressTxController->RegisterTx(txId, txKind, txBody, Ev->Get()->GetSource(), Ev->Cookie, txc); - - ConstructResult(proposeResult, txInfo); - } else { - ConstructResult(proposeResult, fakeTxInfo); - } - } - AFL_VERIFY(!!Result); + ProposeTransaction(TTxController::TBasicTxInfo(txKind, txId), txBody, Ev->Get()->GetSource(), Ev->Cookie, txc); return true; } @@ -143,32 +102,33 @@ TTxController::TProposeResult TTxProposeTransaction::ProposeTtlDeprecated(const const TInstant now = TlsActivationContext ? AppData()->TimeProvider->Now() : TInstant::Now(); for (ui64 pathId : ttlBody.GetPathIds()) { NOlap::TTiering tiering; - tiering.Ttl = NOlap::TTierInfo::MakeTtl(now - unixTime, columnName); + AFL_VERIFY(tiering.Add(NOlap::TTierInfo::MakeTtl(now - unixTime, columnName))); pathTtls.emplace(pathId, std::move(tiering)); } } - if (!Self->SetupTtl(pathTtls, true)) { + if (!Self->SetupTtl(pathTtls)) { return TTxController::TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "TTL not started"); } + Self->TablesManager.MutablePrimaryIndex().OnTieringModified(Self->Tiers, Self->TablesManager.GetTtl(), {}); return TTxController::TProposeResult(); } -void TTxProposeTransaction::ConstructResult(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo) { +void TTxProposeTransaction::OnProposeError(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo) { Result = std::make_unique(Self->TabletID(), txInfo.TxKind, txInfo.TxId, proposeResult.GetStatus(), proposeResult.GetStatusMessage()); - if (proposeResult.GetStatus() == NKikimrTxColumnShard::EResultStatus::PREPARED) { - Self->IncCounter(COUNTER_PREPARE_SUCCESS); - Result->Record.SetMinStep(txInfo.MinStep); - Result->Record.SetMaxStep(txInfo.MaxStep); - if (Self->ProcessingParams) { - Result->Record.MutableDomainCoordinators()->CopyFrom(Self->ProcessingParams->GetCoordinators()); - } - } else if (proposeResult.GetStatus() == NKikimrTxColumnShard::EResultStatus::SUCCESS) { - Self->IncCounter(COUNTER_PREPARE_SUCCESS); - } else { - Self->IncCounter(COUNTER_PREPARE_ERROR); - LOG_S_INFO(TxPrefix() << "error txId " << txInfo.TxId << " " << proposeResult.GetStatusMessage() << TxSuffix()); + Self->IncCounter(COUNTER_PREPARE_ERROR); + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("message", proposeResult.GetStatusMessage())("tablet_id", Self->TabletID())("tx_id", txInfo.TxId); +} + +void TTxProposeTransaction::OnProposeResult(TTxController::TProposeResult& proposeResult, const TTxController::TTxInfo& txInfo) { + AFL_VERIFY(proposeResult.GetStatus() == NKikimrTxColumnShard::EResultStatus::PREPARED)("tx_id", txInfo.TxId)("details", proposeResult.DebugString()); + Result = std::make_unique(Self->TabletID(), txInfo.TxKind, txInfo.TxId, proposeResult.GetStatus(), proposeResult.GetStatusMessage()); + Result->Record.SetMinStep(txInfo.MinStep); + Result->Record.SetMaxStep(txInfo.MaxStep); + if (Self->ProcessingParams) { + Result->Record.MutableDomainCoordinators()->CopyFrom(Self->ProcessingParams->GetCoordinators()); } + Self->IncCounter(COUNTER_PREPARE_SUCCESS); } void TTxProposeTransaction::Complete(const TActorContext& ctx) { @@ -180,12 +140,6 @@ void TTxProposeTransaction::Complete(const TActorContext& ctx) { void TColumnShard::Handle(TEvColumnShard::TEvProposeTransaction::TPtr& ev, const TActorContext& ctx) { - auto& record = Proto(ev->Get()); - auto txKind = record.GetTxKind(); - ui64 txId = record.GetTxId(); - LOG_S_DEBUG("ProposeTransaction " << NKikimrTxColumnShard::ETransactionKind_Name(txKind) - << " txId " << txId << " at tablet " << TabletID()); - Execute(new TTxProposeTransaction(this, ev), ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__read_base.h b/ydb/core/tx/columnshard/columnshard__read_base.h deleted file mode 100644 index 15347cba87d0..000000000000 --- a/ydb/core/tx/columnshard/columnshard__read_base.h +++ /dev/null @@ -1,32 +0,0 @@ -#pragma once -#include "engines/reader/description.h" -#include - -namespace NKikimr::NColumnShard { - -/// Read portion of data in OLAP transaction -class TTxReadBase : public TTransactionBase { -protected: - explicit TTxReadBase(TColumnShard* self) - : TBase(self) - {} - - std::shared_ptr PrepareReadMetadata( - const NOlap::TReadDescription& readDescription, - const std::unique_ptr& insertTable, - const std::unique_ptr& index, - TString& error, const bool isReverse) const; - -protected: - bool ParseProgram( - NKikimrSchemeOp::EOlapProgramType programType, - TString serializedProgram, - NOlap::TReadDescription& read, - const NOlap::IColumnResolver& columnResolver - ); - -protected: - TString ErrorDescription; -}; - -} diff --git a/ydb/core/tx/columnshard/columnshard__scan.cpp b/ydb/core/tx/columnshard/columnshard__scan.cpp index b881f6962cfa..9e00756d2ede 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.cpp +++ b/ydb/core/tx/columnshard/columnshard__scan.cpp @@ -1,877 +1,10 @@ -#include "blobs_reader/actor.h" -#include "blobs_reader/events.h" -#include "blobs_reader/read_coordinator.h" -#include "engines/reader/read_context.h" -#include "resource_subscriber/actor.h" - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include - -#include +#include "columnshard__scan.h" +#include "columnshard.h" +#include "columnshard_impl.h" +#include "engines/reader/transaction/tx_scan.h" namespace NKikimr::NColumnShard { -using namespace NKqp; -using NBlobCache::TBlobRange; - -class TTxScan: public TTxReadBase { -public: - using TReadMetadataPtr = NOlap::TReadMetadataBase::TConstPtr; - - TTxScan(TColumnShard* self, TEvColumnShard::TEvScan::TPtr& ev) - : TTxReadBase(self) - , Ev(ev) { - } - - bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; - void Complete(const TActorContext& ctx) override; - TTxType GetTxType() const override { return TXTYPE_START_SCAN; } - -private: - std::shared_ptr CreateReadMetadata(NOlap::TReadDescription& read, - bool isIndexStats, bool isReverse, ui64 limit); - -private: - TEvColumnShard::TEvScan::TPtr Ev; - std::vector ReadMetadataRanges; -}; - - -constexpr i64 DEFAULT_READ_AHEAD_BYTES = (i64)2 * 1024 * 1024 * 1024; -constexpr TDuration SCAN_HARD_TIMEOUT = TDuration::Minutes(10); -constexpr TDuration SCAN_HARD_TIMEOUT_GAP = TDuration::Seconds(5); - -class TColumnShardScan : public TActorBootstrapped, NArrow::IRowWriter { -private: - std::shared_ptr MemoryAccessor; - TActorId ResourceSubscribeActorId; - TActorId ReadCoordinatorActorId; - const std::shared_ptr StoragesManager; -public: - static constexpr auto ActorActivityType() { - return NKikimrServices::TActivity::KQP_OLAP_SCAN; - } - -public: - virtual void PassAway() override { - Send(ResourceSubscribeActorId, new TEvents::TEvPoisonPill); - Send(ReadCoordinatorActorId, new TEvents::TEvPoisonPill); - IActor::PassAway(); - } - - TColumnShardScan(const TActorId& columnShardActorId, const TActorId& scanComputeActorId, - const std::shared_ptr& storagesManager, const NOlap::TComputeShardingPolicy& computeShardingPolicy, - ui32 scanId, ui64 txId, ui32 scanGen, ui64 requestCookie, - ui64 tabletId, TDuration timeout, std::vector&& readMetadataList, - NKikimrDataEvents::EDataFormat dataFormat, const TScanCounters& scanCountersPool) - : StoragesManager(storagesManager) - , ColumnShardActorId(columnShardActorId) - , ScanComputeActorId(scanComputeActorId) - , BlobCacheActorId(NBlobCache::MakeBlobCacheServiceId()) - , ScanId(scanId) - , TxId(txId) - , ScanGen(scanGen) - , RequestCookie(requestCookie) - , DataFormat(dataFormat) - , TabletId(tabletId) - , ReadMetadataRanges(std::move(readMetadataList)) - , ReadMetadataIndex(0) - , Deadline(TInstant::Now() + (timeout ? timeout + SCAN_HARD_TIMEOUT_GAP : SCAN_HARD_TIMEOUT)) - , ScanCountersPool(scanCountersPool) - , Stats(NTracing::TTraceClient::GetLocalClient("SHARD", ::ToString(TabletId)/*, "SCAN_TXID:" + ::ToString(TxId)*/)) - , ComputeShardingPolicy(computeShardingPolicy) - { - AFL_VERIFY(ReadMetadataRanges.size() == 1); - KeyYqlSchema = ReadMetadataRanges[ReadMetadataIndex]->GetKeyYqlSchema(); - } - - void Bootstrap(const TActorContext& ctx) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) - ("SelfId", SelfId())("TabletId", TabletId)("ScanId", ScanId)("TxId", TxId)("ScanGen", ScanGen) - ); - auto g = Stats->MakeGuard("bootstrap"); - ScanActorId = ctx.SelfID; - Schedule(Deadline, new TEvents::TEvWakeup); - - Y_ABORT_UNLESS(!ScanIterator); - MemoryAccessor = std::make_shared(SelfId(), "CSScan/Result"); - ResourceSubscribeActorId = ctx.Register(new NOlap::NResourceBroker::NSubscribe::TActor(TabletId, SelfId())); - ReadCoordinatorActorId = ctx.Register(new NOlap::NBlobOperations::NRead::TReadCoordinatorActor(TabletId, SelfId())); - - std::shared_ptr context = std::make_shared(StoragesManager, ScanCountersPool, - ReadMetadataRanges[ReadMetadataIndex], SelfId(), ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy); - ScanIterator = ReadMetadataRanges[ReadMetadataIndex]->StartScan(context); - - // propagate self actor id // TODO: FlagSubscribeOnSession ? - Send(ScanComputeActorId, new TEvKqpCompute::TEvScanInitActor(ScanId, ctx.SelfID, ScanGen, TabletId), IEventHandle::FlagTrackDelivery); - - Become(&TColumnShardScan::StateScan); - ContinueProcessing(); - } - -private: - STATEFN(StateScan) { - auto g = Stats->MakeGuard("processing"); - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) - ("SelfId", SelfId())("TabletId", TabletId)("ScanId", ScanId)("TxId", TxId)("ScanGen", ScanGen) - ); - switch (ev->GetTypeRewrite()) { - hFunc(TEvKqpCompute::TEvScanDataAck, HandleScan); - hFunc(TEvKqp::TEvAbortExecution, HandleScan); - hFunc(TEvents::TEvUndelivered, HandleScan); - hFunc(TEvents::TEvWakeup, HandleScan); - hFunc(NConveyor::TEvExecution::TEvTaskProcessedResult, HandleScan); - default: - AFL_VERIFY(false)("unexpected_event", ev->GetTypeName()); - } - } - - bool ReadNextBlob() { - while (ScanIterator->ReadNextInterval()) { - } - return true; - } - - void HandleScan(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev) { - --InFlightReads; - auto g = Stats->MakeGuard("task_result"); - if (ev->Get()->GetErrorMessage()) { - ACFL_ERROR("event", "TEvTaskProcessedResult")("error", ev->Get()->GetErrorMessage()); - SendScanError("task_error:" + ev->Get()->GetErrorMessage()); - Finish(); - } else { - ACFL_DEBUG("event", "TEvTaskProcessedResult"); - auto t = static_pointer_cast(ev->Get()->GetResult()); - Y_DEBUG_ABORT_UNLESS(dynamic_pointer_cast(ev->Get()->GetResult())); - if (!ScanIterator->Finished()) { - ScanIterator->Apply(t); - } - } - ContinueProcessing(); - } - - void HandleScan(TEvKqpCompute::TEvScanDataAck::TPtr& ev) { - auto g = Stats->MakeGuard("ack"); - Y_ABORT_UNLESS(!AckReceivedInstant); - AckReceivedInstant = TMonotonic::Now(); - - Y_ABORT_UNLESS(ev->Get()->Generation == ScanGen); - - ChunksLimiter = TChunksLimiter(ev->Get()->FreeSpace, ev->Get()->MaxChunksCount); - Y_ABORT_UNLESS(ev->Get()->MaxChunksCount == 1); - ACFL_DEBUG("event", "TEvScanDataAck")("info", ChunksLimiter.DebugString()); - if (ScanIterator) { - if (!!ScanIterator->GetAvailableResultsCount() && !*ScanIterator->GetAvailableResultsCount()) { - ScanCountersPool.OnEmptyAck(); - } else { - ScanCountersPool.OnNotEmptyAck(); - } - } - ContinueProcessing(); - } - - // Returns true if it was able to produce new batch - bool ProduceResults() noexcept { - auto g = Stats->MakeGuard("ProduceResults"); - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build()("method", "produce result")); - - ACFL_DEBUG("stage", "start")("iterator", ScanIterator->DebugString()); - Y_ABORT_UNLESS(!Finished); - Y_ABORT_UNLESS(ScanIterator); - - if (ScanIterator->Finished()) { - ACFL_DEBUG("stage", "scan iterator is finished")("iterator", ScanIterator->DebugString()); - return false; - } - - if (!ChunksLimiter.HasMore()) { - ScanIterator->PrepareResults(); - ACFL_DEBUG("stage", "bytes limit exhausted")("limit", ChunksLimiter.DebugString()); - return false; - } - - auto resultOpt = ScanIterator->GetBatch(); - if (!resultOpt) { - ACFL_DEBUG("stage", "no data is ready yet")("iterator", ScanIterator->DebugString()); - return false; - } - auto& result = *resultOpt; - if (!result.ErrorString.empty()) { - ACFL_ERROR("stage", "got error")("iterator", ScanIterator->DebugString())("message", result.ErrorString); - SendAbortExecution(TString(result.ErrorString.data(), result.ErrorString.size())); - - ScanIterator.reset(); - Finish(); - return false; - } - - if (!result.GetRecordsCount()) { - ACFL_DEBUG("stage", "got empty batch")("iterator", ScanIterator->DebugString()); - return true; - } - - auto& shardedBatch = result.GetShardedBatch(); - auto batch = shardedBatch.GetRecordBatch(); - int numRows = batch->num_rows(); - int numColumns = batch->num_columns(); - ACFL_DEBUG("stage", "ready result")("iterator", ScanIterator->DebugString())("columns", numColumns)("rows", result.GetRecordsCount()); - - AFL_VERIFY(DataFormat == NKikimrDataEvents::FORMAT_ARROW); - { - MakeResult(0); - if (shardedBatch.IsSharded()) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "compute_sharding_success")("count", shardedBatch.GetSplittedByShards().size())("info", ComputeShardingPolicy.DebugString()); - Result->SplittedBatches = shardedBatch.GetSplittedByShards(); - Result->ArrowBatch = shardedBatch.GetRecordBatch(); - } else { - if (ComputeShardingPolicy.IsEnabled()) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "compute_sharding_problems")("info", ComputeShardingPolicy.DebugString()); - } - Result->ArrowBatch = shardedBatch.GetRecordBatch(); - } - Rows += batch->num_rows(); - Bytes += NArrow::GetBatchDataSize(batch); - ACFL_DEBUG("stage", "data_format")("batch_size", NArrow::GetBatchDataSize(batch))("num_rows", numRows)("batch_columns", JoinSeq(",", batch->schema()->field_names())); - } - if (CurrentLastReadKey && ReadMetadataRanges.size() == 1) { - NOlap::NIndexedReader::TSortableBatchPosition pNew(result.GetLastReadKey(), 0, result.GetLastReadKey()->schema()->field_names(), {}, ReadMetadataRanges.front()->IsDescSorted()); - NOlap::NIndexedReader::TSortableBatchPosition pOld(CurrentLastReadKey, 0, CurrentLastReadKey->schema()->field_names(), {}, ReadMetadataRanges.front()->IsDescSorted()); - AFL_VERIFY(pOld < pNew)("old", pOld.DebugJson().GetStringRobust())("new", pNew.DebugJson().GetStringRobust()); - } - CurrentLastReadKey = result.GetLastReadKey(); - - Result->LastKey = ConvertLastKey(result.GetLastReadKey()); - SendResult(false, false); - ACFL_DEBUG("stage", "finished")("iterator", ScanIterator->DebugString()); - return true; - } - - void ContinueProcessingStep() { - if (!ScanIterator) { - ACFL_DEBUG("event", "ContinueProcessingStep")("stage", "iterator is not initialized"); - return; - } - const bool hasAck = !!AckReceivedInstant; - // Send new results if there is available capacity - while (ScanIterator && ProduceResults()) { - } - - // Switch to the next range if the current one is finished - if (ScanIterator && ScanIterator->Finished() && hasAck) { - NextReadMetadata(); - } - - if (ScanIterator) { - // Make read-ahead requests for the subsequent blobs - ReadNextBlob(); - } - } - - void ContinueProcessing() { - const i64 maxSteps = ReadMetadataRanges.size(); - for (i64 step = 0; step <= maxSteps; ++step) { - ContinueProcessingStep(); - if (!ScanIterator || !ChunksLimiter.HasMore() || InFlightReads || MemoryAccessor->InWaiting() || ScanCountersPool.InWaiting()) { - return; - } - } - ScanCountersPool.Hanging->Add(1); - // The loop has finished without any progress! - LOG_ERROR_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, - "Scan " << ScanActorId << " is hanging" - << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId << " debug: " << ScanIterator->DebugString()); - Y_DEBUG_ABORT_UNLESS(false); - } - - void HandleScan(TEvKqp::TEvAbortExecution::TPtr& ev) noexcept { - auto& msg = ev->Get()->Record; - const TString reason = ev->Get()->GetIssues().ToOneLineString(); - - auto prio = msg.GetStatusCode() == NYql::NDqProto::StatusIds::SUCCESS ? NActors::NLog::PRI_DEBUG : NActors::NLog::PRI_WARN; - LOG_LOG_S(*TlsActivationContext, prio, NKikimrServices::TX_COLUMNSHARD_SCAN, - "Scan " << ScanActorId << " got AbortExecution" - << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId - << " code: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.GetStatusCode()) - << " reason: " << reason); - - AbortReason = std::move(reason); - Finish(); - } - - void HandleScan(TEvents::TEvUndelivered::TPtr& ev) { - ui32 eventType = ev->Get()->SourceType; - switch (eventType) { - case TEvKqpCompute::TEvScanInitActor::EventType: - AbortReason = "init failed"; - break; - case TEvKqpCompute::TEvScanData::EventType: - AbortReason = "failed to send data batch"; - break; - } - - LOG_WARN_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, - "Scan " << ScanActorId << " undelivered event: " << eventType - << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId - << " reason: " << ev->Get()->Reason - << " description: " << AbortReason); - - Finish(); - } - - void HandleScan(TEvents::TEvWakeup::TPtr& /*ev*/) { - LOG_ERROR_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, - "Scan " << ScanActorId << " guard execution timeout" - << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId); - - Finish(); - } - -private: - void MakeResult(size_t reserveRows = 0) { - if (!Finished && !Result) { - Result = MakeHolder(ScanId, ScanGen); - if (reserveRows) { - Y_ABORT_UNLESS(DataFormat != NKikimrDataEvents::FORMAT_ARROW); - Result->Rows.reserve(reserveRows); - } - } - } - - void NextReadMetadata() { - auto g = Stats->MakeGuard("NextReadMetadata"); - if (++ReadMetadataIndex == ReadMetadataRanges.size()) { - // Send empty batch with "finished" flag - MakeResult(); - SendResult(false, true); - ScanIterator.reset(); - return Finish(); - } - - auto context = std::make_shared(StoragesManager, ScanCountersPool, ReadMetadataRanges[ReadMetadataIndex], SelfId(), - ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy); - ScanIterator = ReadMetadataRanges[ReadMetadataIndex]->StartScan(context); - } - - void AddRow(const TConstArrayRef& row) override { - Result->Rows.emplace_back(TOwnedCellVec::Make(row)); - ++Rows; - - // NOTE: Some per-row overhead to deal with the case when no columns were requested - Bytes += std::max((ui64)8, (ui64)Result->Rows.back().DataSize()); - } - - TOwnedCellVec ConvertLastKey(const std::shared_ptr& lastReadKey) { - Y_ABORT_UNLESS(lastReadKey, "last key must be passed"); - - struct TSingeRowWriter : public IRowWriter { - TOwnedCellVec Row; - bool Done = false; - void AddRow(const TConstArrayRef& row) override { - Y_ABORT_UNLESS(!Done); - Row = TOwnedCellVec::Make(row); - Done = true; - } - } singleRowWriter; - NArrow::TArrowToYdbConverter converter(KeyYqlSchema, singleRowWriter); - TString errStr; - bool ok = converter.Process(*lastReadKey, errStr); - Y_ABORT_UNLESS(ok, "%s", errStr.c_str()); - - Y_ABORT_UNLESS(singleRowWriter.Done); - return singleRowWriter.Row; - } - - class TScanStatsOwner: public NKqp::TEvKqpCompute::IShardScanStats { - private: - YDB_READONLY_DEF(NOlap::TReadStats, Stats); - public: - TScanStatsOwner(const NOlap::TReadStats& stats) - : Stats(stats) { - - } - - virtual THashMap GetMetrics() const override { - THashMap result; - result["compacted_bytes"] = Stats.CompactedPortionsBytes; - result["inserted_bytes"] = Stats.InsertedPortionsBytes; - result["committed_bytes"] = Stats.CommittedPortionsBytes; - return result; - } - }; - - bool SendResult(bool pageFault, bool lastBatch) { - if (Finished) { - return true; - } - - Result->PageFault = pageFault; - Result->PageFaults = PageFaults; - Result->Finished = lastBatch; - if (ScanIterator) { - Result->AvailablePacks = ScanIterator->GetAvailableResultsCount(); - } - TDuration totalElapsedTime = TDuration::Seconds(GetElapsedTicksAsSeconds()); - // Result->TotalTime = totalElapsedTime - LastReportedElapsedTime; - // TODO: Result->CpuTime = ... - LastReportedElapsedTime = totalElapsedTime; - - PageFaults = 0; - - LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, - "Scan " << ScanActorId << " send ScanData to " << ScanComputeActorId - << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId - << " bytes: " << Bytes << " rows: " << Rows << " page faults: " << Result->PageFaults - << " finished: " << Result->Finished << " pageFault: " << Result->PageFault - << " arrow schema:\n" << (Result->ArrowBatch ? Result->ArrowBatch->schema()->ToString() : "")); - - Finished = Result->Finished; - if (Finished) { - ALS_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN) << - "Scanner finished " << ScanActorId << " and sent to " << ScanComputeActorId - << " packs: " << PacksSum << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId - << " bytes: " << Bytes << "/" << BytesSum << " rows: " << Rows << "/" << RowsSum << " page faults: " << Result->PageFaults - << " finished: " << Result->Finished << " pageFault: " << Result->PageFault - << " stats:" << Stats->ToJson() << ";iterator:" << (ScanIterator ? ScanIterator->DebugString(false) : "NO"); - Result->StatsOnFinished = std::make_shared(ScanIterator->GetStats()); - } else { - Y_ABORT_UNLESS(ChunksLimiter.Take(Bytes)); - Result->RequestedBytesLimitReached = !ChunksLimiter.HasMore(); - Y_ABORT_UNLESS(AckReceivedInstant); - ScanCountersPool.AckWaitingInfo(TMonotonic::Now() - *AckReceivedInstant); - } - AckReceivedInstant.reset(); - - Send(ScanComputeActorId, Result.Release(), IEventHandle::FlagTrackDelivery); // TODO: FlagSubscribeOnSession ? - - ReportStats(); - - return true; - } - - void SendScanError(TString reason = {}) { - TString msg = TStringBuilder() << "Scan failed at tablet " << TabletId; - if (!reason.empty()) { - msg += ", reason: " + reason; - } - - auto ev = MakeHolder(ScanGen, TabletId); - ev->Record.SetStatus(Ydb::StatusIds::GENERIC_ERROR); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_RESULT_UNAVAILABLE, msg); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - - Send(ScanComputeActorId, ev.Release()); - } - - void SendAbortExecution(TString reason = {}) { - auto status = NYql::NDqProto::StatusIds::PRECONDITION_FAILED; - TString msg = TStringBuilder() << "Scan failed at tablet " << TabletId; - if (!reason.empty()) { - msg += ", reason: " + reason; - } - - Send(ScanComputeActorId, new TEvKqp::TEvAbortExecution(status, msg)); - } - - void Finish() { - LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, - "Scan " << ScanActorId << " finished for tablet " << TabletId); - - Send(ColumnShardActorId, new TEvPrivate::TEvReadFinished(RequestCookie, TxId)); - ReportStats(); - PassAway(); - } - - void ReportStats() { - Send(ColumnShardActorId, new TEvPrivate::TEvScanStats(Rows, Bytes)); - Rows = 0; - Bytes = 0; - } - - class TInFlightGuard: NNonCopyable::TNonCopyable { - private: - static inline TAtomicCounter InFlightGlobal = 0; - i64 InFlightGuarded = 0; - public: - ~TInFlightGuard() { - Return(InFlightGuarded); - } - - bool CanTake() { - return InFlightGlobal.Val() < DEFAULT_READ_AHEAD_BYTES || !InFlightGuarded; - } - - void Take(const ui64 bytes) { - InFlightGlobal.Add(bytes); - InFlightGuarded += bytes; - } - - void Return(const ui64 bytes) { - Y_ABORT_UNLESS(InFlightGlobal.Sub(bytes) >= 0); - InFlightGuarded -= bytes; - Y_ABORT_UNLESS(InFlightGuarded >= 0); - } - }; - -private: - const TActorId ColumnShardActorId; - const TActorId ReadBlobsActorId; - const TActorId ScanComputeActorId; - std::optional AckReceivedInstant; - TActorId ScanActorId; - TActorId BlobCacheActorId; - const ui32 ScanId; - const ui64 TxId; - const ui32 ScanGen; - const ui64 RequestCookie; - const NKikimrDataEvents::EDataFormat DataFormat; - const ui64 TabletId; - - std::vector ReadMetadataRanges; - ui32 ReadMetadataIndex; - std::unique_ptr ScanIterator; - - std::vector> KeyYqlSchema; - const TSerializedTableRange TableRange; - const TSmallVec SkipNullKeys; - const TInstant Deadline; - TConcreteScanCounters ScanCountersPool; - - TMaybe AbortReason; - - TChunksLimiter ChunksLimiter; - THolder Result; - std::shared_ptr CurrentLastReadKey; - i64 InFlightReads = 0; - bool Finished = false; - - class TBlobStats { - private: - ui64 PartsCount = 0; - ui64 Bytes = 0; - TDuration ReadingDurationSum; - TDuration ReadingDurationMax; - NMonitoring::THistogramPtr BlobDurationsCounter; - NMonitoring::THistogramPtr ByteDurationsCounter; - public: - TBlobStats(const NMonitoring::THistogramPtr blobDurationsCounter, const NMonitoring::THistogramPtr byteDurationsCounter) - : BlobDurationsCounter(blobDurationsCounter) - , ByteDurationsCounter(byteDurationsCounter) - { - - } - void Received(const NBlobCache::TBlobRange& br, const TDuration d) { - ReadingDurationSum += d; - ReadingDurationMax = Max(ReadingDurationMax, d); - ++PartsCount; - Bytes += br.Size; - BlobDurationsCounter->Collect(d.MilliSeconds()); - ByteDurationsCounter->Collect((i64)d.MilliSeconds(), br.Size); - } - TString DebugString() const { - TStringBuilder sb; - if (PartsCount) { - sb << "p_count=" << PartsCount << ";"; - sb << "bytes=" << Bytes << ";"; - sb << "d_avg=" << ReadingDurationSum / PartsCount << ";"; - sb << "d_max=" << ReadingDurationMax << ";"; - } else { - sb << "NO_BLOBS;"; - } - return sb; - } - }; - - NTracing::TTraceClientGuard Stats; - const NOlap::TComputeShardingPolicy ComputeShardingPolicy; - ui64 Rows = 0; - ui64 BytesSum = 0; - ui64 RowsSum = 0; - ui64 PacksSum = 0; - ui64 Bytes = 0; - ui32 PageFaults = 0; - TDuration LastReportedElapsedTime; -}; - -static bool FillPredicatesFromRange(NOlap::TReadDescription& read, const ::NKikimrTx::TKeyRange& keyRange, - const std::vector>& ydbPk, ui64 tabletId, const NOlap::TIndexInfo* indexInfo, TString& error) { - TSerializedTableRange range(keyRange); - auto fromPredicate = std::make_shared(); - auto toPredicate = std::make_shared(); - std::tie(*fromPredicate, *toPredicate) = RangePredicates(range, ydbPk); - - LOG_S_DEBUG("TTxScan range predicate. From key size: " << range.From.GetCells().size() - << " To key size: " << range.To.GetCells().size() - << " greater predicate over columns: " << fromPredicate->ToString() - << " less predicate over columns: " << toPredicate->ToString() - << " at tablet " << tabletId); - - if (!read.PKRangesFilter.Add(fromPredicate, toPredicate, indexInfo)) { - error = "Error building filter"; - return false; - } - return true; -} - -std::shared_ptr -PrepareStatsReadMetadata(ui64 tabletId, const NOlap::TReadDescription& read, const std::unique_ptr& index, TString& error, const bool isReverse) { - THashSet readColumnIds(read.ColumnIds.begin(), read.ColumnIds.end()); - for (auto& [id, name] : read.GetProgram().GetSourceColumns()) { - readColumnIds.insert(id); - } - - for (ui32 colId : readColumnIds) { - if (!PrimaryIndexStatsSchema.Columns.contains(colId)) { - error = Sprintf("Columnd id %" PRIu32 " not found", colId); - return {}; - } - } - - auto out = std::make_shared(tabletId, - isReverse ? NOlap::TReadStatsMetadata::ESorting::DESC : NOlap::TReadStatsMetadata::ESorting::ASC, - read.GetProgram(), index ? index->GetVersionedIndex().GetSchema(read.GetSnapshot()) : nullptr, read.GetSnapshot()); - - out->SetPKRangesFilter(read.PKRangesFilter); - out->ReadColumnIds.assign(readColumnIds.begin(), readColumnIds.end()); - out->ResultColumnIds = read.ColumnIds; - - const NOlap::TColumnEngineForLogs* logsIndex = dynamic_cast(index.get()); - if (!index || !logsIndex) { - return out; - } - THashMap> portionsInUse; - const auto predStatSchema = [](const std::shared_ptr& l, const std::shared_ptr& r) { - return std::tuple(l->GetPathId(), l->GetPortionId()) < std::tuple(r->GetPathId(), r->GetPortionId()); - }; - for (auto&& filter : read.PKRangesFilter) { - const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); - const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); - if (read.TableName.EndsWith(NOlap::TIndexInfo::TABLE_INDEX_STATS_TABLE)) { - if (fromPathId <= read.PathId && toPathId >= read.PathId) { - auto pathInfo = logsIndex->GetGranuleOptional(read.PathId); - if (!pathInfo) { - continue; - } - for (auto&& p : pathInfo->GetPortions()) { - if (portionsInUse[read.PathId].emplace(p.first).second) { - out->IndexPortions.emplace_back(p.second); - } - } - } - std::sort(out->IndexPortions.begin(), out->IndexPortions.end(), predStatSchema); - } else if (read.TableName.EndsWith(NOlap::TIndexInfo::STORE_INDEX_STATS_TABLE)) { - auto pathInfos = logsIndex->GetTables(fromPathId, toPathId); - for (auto&& pathInfo: pathInfos) { - for (auto&& p: pathInfo->GetPortions()) { - if (portionsInUse[p.second->GetPathId()].emplace(p.first).second) { - out->IndexPortions.emplace_back(p.second); - } - } - } - std::sort(out->IndexPortions.begin(), out->IndexPortions.end(), predStatSchema); - } - } - - return out; -} - -std::shared_ptr TTxScan::CreateReadMetadata(NOlap::TReadDescription& read, - bool indexStats, bool isReverse, ui64 itemsLimit) -{ - std::shared_ptr metadata; - if (indexStats) { - metadata = PrepareStatsReadMetadata(Self->TabletID(), read, Self->TablesManager.GetPrimaryIndex(), ErrorDescription, isReverse); - } else { - metadata = PrepareReadMetadata(read, Self->InsertTable, Self->TablesManager.GetPrimaryIndex(), - ErrorDescription, isReverse); - } - - if (!metadata) { - return nullptr; - } - - if (itemsLimit) { - metadata->Limit = itemsLimit; - } - - return metadata; -} - - -bool TTxScan::Execute(TTransactionContext& txc, const TActorContext& /*ctx*/) { - Y_UNUSED(txc); - - auto& record = Ev->Get()->Record; - const auto& snapshot = record.GetSnapshot(); - const auto scanId = record.GetScanId(); - const ui64 txId = record.GetTxId(); - - LOG_S_DEBUG("TTxScan prepare txId: " << txId << " scanId: " << scanId << " at tablet " << Self->TabletID()); - - ui64 itemsLimit = record.HasItemsLimit() ? record.GetItemsLimit() : 0; - - NOlap::TReadDescription read(NOlap::TSnapshot(snapshot.GetStep(), snapshot.GetTxId()), record.GetReverse()); - read.PathId = record.GetLocalPathId(); - read.ReadNothing = !(Self->TablesManager.HasTable(read.PathId)); - read.TableName = record.GetTablePath(); - bool isIndexStats = read.TableName.EndsWith(NOlap::TIndexInfo::STORE_INDEX_STATS_TABLE) || - read.TableName.EndsWith(NOlap::TIndexInfo::TABLE_INDEX_STATS_TABLE); - read.ColumnIds.assign(record.GetColumnTags().begin(), record.GetColumnTags().end()); - read.StatsMode = record.GetStatsMode(); - - const NOlap::TIndexInfo* indexInfo = nullptr; - if (!isIndexStats) { - indexInfo = &(Self->TablesManager.GetIndexInfo(NOlap::TSnapshot(snapshot.GetStep(), snapshot.GetTxId()))); - } - - bool parseResult; - - if (!isIndexStats) { - TIndexColumnResolver columnResolver(*indexInfo); - parseResult = ParseProgram(record.GetOlapProgramType(), record.GetOlapProgram(), read, columnResolver); - } else { - TStatsColumnResolver columnResolver; - parseResult = ParseProgram(record.GetOlapProgramType(), record.GetOlapProgram(), read, columnResolver); - } - - if (!parseResult) { - return true; - } - - if (!record.RangesSize()) { - auto range = CreateReadMetadata(read, isIndexStats, record.GetReverse(), itemsLimit); - if (range) { - ReadMetadataRanges = {range}; - } - return true; - } - - ReadMetadataRanges.reserve(record.RangesSize()); - - auto ydbKey = isIndexStats ? - NOlap::GetColumns(PrimaryIndexStatsSchema, PrimaryIndexStatsSchema.KeyColumns) : - indexInfo->GetPrimaryKeyColumns(); - - for (auto& range: record.GetRanges()) { - if (!FillPredicatesFromRange(read, range, ydbKey, Self->TabletID(), isIndexStats ? nullptr : indexInfo, ErrorDescription)) { - ReadMetadataRanges.clear(); - return true; - } - } - { - auto newRange = CreateReadMetadata(read, isIndexStats, record.GetReverse(), itemsLimit); - if (!newRange) { - ReadMetadataRanges.clear(); - return true; - } - ReadMetadataRanges.emplace_back(newRange); - } - Y_ABORT_UNLESS(ReadMetadataRanges.size() == 1); - - return true; -} - -template -struct TContainerPrinter { - const T& Ref; - - TContainerPrinter(const T& ref) - : Ref(ref) - {} - - friend IOutputStream& operator << (IOutputStream& out, const TContainerPrinter& cont) { - for (auto& ptr : cont.Ref) { - out << *ptr << " "; - } - return out; - } -}; - -void TTxScan::Complete(const TActorContext& ctx) { - auto& request = Ev->Get()->Record; - auto scanComputeActor = Ev->Sender; - const auto& snapshot = request.GetSnapshot(); - const auto scanId = request.GetScanId(); - const ui64 txId = request.GetTxId(); - const ui32 scanGen = request.GetGeneration(); - TString table = request.GetTablePath(); - auto dataFormat = request.GetDataFormat(); - const TDuration timeout = TDuration::MilliSeconds(request.GetTimeoutMs()); - if (scanGen > 1) { - Self->IncCounter(COUNTER_SCAN_RESTARTED); - } - - TStringStream detailedInfo; - if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_TRACE, NKikimrServices::TX_COLUMNSHARD)) { - detailedInfo << " read metadata: (" << TContainerPrinter(ReadMetadataRanges) << ")" << " req: " << request; - } - if (ReadMetadataRanges.empty()) { - LOG_S_DEBUG("TTxScan failed " - << " txId: " << txId - << " scanId: " << scanId - << " gen: " << scanGen - << " table: " << table - << " snapshot: " << snapshot - << " timeout: " << timeout - << detailedInfo.Str() - << " at tablet " << Self->TabletID()); - - auto ev = MakeHolder(scanGen, Self->TabletID()); - - ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); - auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, TStringBuilder() - << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << ErrorDescription ? ErrorDescription : "unknown error"); - NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); - - ctx.Send(scanComputeActor, ev.Release()); - return; - } - - ui64 requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRanges); - auto statsDelta = Self->InFlightReadsTracker.GetSelectStatsDelta(); - - Self->IncCounter(COUNTER_READ_INDEX_PORTIONS, statsDelta.Portions); - Self->IncCounter(COUNTER_READ_INDEX_BLOBS, statsDelta.Blobs); - Self->IncCounter(COUNTER_READ_INDEX_ROWS, statsDelta.Rows); - Self->IncCounter(COUNTER_READ_INDEX_BYTES, statsDelta.Bytes); - - NOlap::TComputeShardingPolicy shardingPolicy; - AFL_VERIFY(shardingPolicy.DeserializeFromProto(request.GetComputeShardingPolicy())); - - auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), - shardingPolicy, scanId, txId, scanGen, requestCookie, Self->TabletID(), timeout, std::move(ReadMetadataRanges), dataFormat, Self->ScanCounters)); - - LOG_S_DEBUG("TTxScan starting " << scanActor - << " txId: " << txId - << " scanId: " << scanId - << " gen: " << scanGen - << " table: " << table - << " snapshot: " << snapshot - << " timeout: " << timeout - << detailedInfo.Str() - << " at tablet " << Self->TabletID()); -} - - void TColumnShard::Handle(TEvColumnShard::TEvScan::TPtr& ev, const TActorContext& ctx) { auto& record = ev->Get()->Record; ui64 txId = record.GetTxId(); @@ -896,61 +29,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvScan::TPtr& ev, const TActorContext LastAccessTime = TAppData::TimeProvider->Now(); ScanTxInFlight.insert({txId, LastAccessTime}); SetCounter(COUNTER_SCAN_IN_FLY, ScanTxInFlight.size()); - Execute(new TTxScan(this, ev), ctx); -} - -const NKikimr::NOlap::TReadStats& TScanIteratorBase::GetStats() const { - return Default(); -} - -} - -namespace NKikimr::NOlap { - -class TCurrentBatch { -private: - std::vector Results; - ui64 RecordsCount = 0; -public: - ui64 GetRecordsCount() const { - return RecordsCount; - } - - void AddChunk(TPartialReadResult&& res) { - RecordsCount += res.GetRecordsCount(); - Results.emplace_back(std::move(res)); - } - - void FillResult(std::vector& result) const { - if (Results.empty()) { - return; - } - for (auto&& i : Results) { - result.emplace_back(std::move(i)); - } - } -}; - -std::vector TPartialReadResult::SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult) { - std::vector resultBatches; - TCurrentBatch currentBatch; - for (auto&& i : resultsExt) { - AFL_VERIFY(i.GetRecordsCount()); - currentBatch.AddChunk(std::move(i)); - if (currentBatch.GetRecordsCount() >= maxRecordsInResult) { - resultBatches.emplace_back(std::move(currentBatch)); - currentBatch = TCurrentBatch(); - } - } - if (currentBatch.GetRecordsCount()) { - resultBatches.emplace_back(std::move(currentBatch)); - } - - std::vector result; - for (auto&& i : resultBatches) { - i.FillResult(result); - } - return result; + Execute(new NOlap::NReader::TTxScan(this, ev), ctx); } } diff --git a/ydb/core/tx/columnshard/columnshard__scan.h b/ydb/core/tx/columnshard/columnshard__scan.h index 2c6dd9c52a57..6f70f09beec2 100644 --- a/ydb/core/tx/columnshard/columnshard__scan.h +++ b/ydb/core/tx/columnshard/columnshard__scan.h @@ -1,116 +1 @@ #pragma once - -#include "blob_cache.h" -#include "blobs_reader/task.h" -#include "engines/reader/conveyor_task.h" -#include "resources/memory.h" -#include -#include -#include - -namespace NKikimr::NOlap { -struct TReadStats; -// Represents a batch of rows produced by ASC or DESC scan with applied filters and partial aggregation -class TPartialReadResult { -private: - YDB_READONLY_DEF(std::vector>, ResourcesGuards); - NArrow::TShardedRecordBatch ResultBatch; - - // This 1-row batch contains the last key that was read while producing the ResultBatch. - // NOTE: it might be different from the Key of last row in ResulBatch in case of filtering/aggregation/limit - std::shared_ptr LastReadKey; - -public: - void Cut(const ui32 limit) { - ResultBatch.Cut(limit); - } - - const arrow::RecordBatch& GetResultBatch() const { - return *ResultBatch.GetRecordBatch(); - } - - const std::shared_ptr& GetResultBatchPtrVerified() const { - return ResultBatch.GetRecordBatch(); - } - - const std::shared_ptr& GetResourcesGuardOnly() const { - AFL_VERIFY(ResourcesGuards.size() == 1); - AFL_VERIFY(!!ResourcesGuards.front()); - return ResourcesGuards.front(); - } - - ui64 GetMemorySize() const { - return ResultBatch.GetMemorySize(); - } - - ui64 GetRecordsCount() const { - return ResultBatch.GetRecordsCount(); - } - - static std::vector SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult); - - const NArrow::TShardedRecordBatch& GetShardedBatch() const { - return ResultBatch; - } - - const std::shared_ptr& GetLastReadKey() const { - return LastReadKey; - } - - std::string ErrorString; - - explicit TPartialReadResult( - const std::vector>& resourcesGuards, - const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey) - : ResourcesGuards(resourcesGuards) - , ResultBatch(batch) - , LastReadKey(lastKey) { - for (auto&& i : ResourcesGuards) { - AFL_VERIFY(i); - } - Y_ABORT_UNLESS(ResultBatch.GetRecordsCount()); - Y_ABORT_UNLESS(LastReadKey); - Y_ABORT_UNLESS(LastReadKey->num_rows() == 1); - } - - explicit TPartialReadResult( - const std::shared_ptr& resourcesGuards, - const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey) - : TPartialReadResult(std::vector>({resourcesGuards}), batch, lastKey) { - AFL_VERIFY(resourcesGuards); - } - - explicit TPartialReadResult(const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey) - : TPartialReadResult(std::vector>(), batch, lastKey) { - } -}; -} - -namespace NKikimr::NColumnShard { - -class TScanIteratorBase { -public: - virtual ~TScanIteratorBase() = default; - - virtual void Apply(IDataTasksProcessor::ITask::TPtr /*processor*/) { - - } - - virtual const NOlap::TReadStats& GetStats() const; - - virtual std::optional GetAvailableResultsCount() const { - return {}; - } - virtual bool Finished() const = 0; - virtual std::optional GetBatch() = 0; - virtual void PrepareResults() { - - } - virtual bool ReadNextInterval() { return false; } - virtual TString DebugString(const bool verbose = false) const { - Y_UNUSED(verbose); - return "NO_DATA"; - } -}; - -} diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp b/ydb/core/tx/columnshard/columnshard__stats_scan.cpp deleted file mode 100644 index c7b189983a38..000000000000 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include "columnshard__stats_scan.h" - -namespace NKikimr::NColumnShard { - -std::optional TStatsIterator::GetBatch() { - // Take next raw batch - auto batch = FillStatsBatch(); - - // Extract the last row's PK - auto keyBatch = NArrow::ExtractColumns(batch, KeySchema); - auto lastKey = keyBatch->Slice(keyBatch->num_rows() - 1, 1); - - ApplyRangePredicates(batch); - if (!batch->num_rows()) { - return {}; - } - // Leave only requested columns - auto resultBatch = NArrow::ExtractColumns(batch, ResultSchema); - NArrow::TStatusValidator::Validate(ReadMetadata->GetProgram().ApplyProgram(resultBatch)); - if (!resultBatch->num_rows()) { - return {}; - } - NOlap::TPartialReadResult out(resultBatch, lastKey); - - return std::move(out); -} - -std::shared_ptr TStatsIterator::FillStatsBatch() { - std::vector> portions; - ui32 recordsCount = 0; - while (IndexPortions.size()) { - auto& i = IndexPortions.front(); - recordsCount += i->Records.size(); - portions.emplace_back(i); - IndexPortions.pop_front(); - if (recordsCount > 10000) { - break; - } - } - std::vector allColumnIds; - for (const auto& c : PrimaryIndexStatsSchema.Columns) { - allColumnIds.push_back(c.second.Id); - } - std::sort(allColumnIds.begin(), allColumnIds.end()); - auto schema = NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, allColumnIds); - auto builders = NArrow::MakeBuilders(schema, recordsCount); - - for (auto&& p: portions) { - AppendStats(builders, *p); - } - - auto columns = NArrow::Finish(std::move(builders)); - return arrow::RecordBatch::Make(schema, recordsCount, columns); -} - -void TStatsIterator::ApplyRangePredicates(std::shared_ptr& batch) { - NArrow::TColumnFilter filter = ReadMetadata->GetPKRangesFilter().BuildFilter(batch); - filter.Apply(batch); -} - -void TStatsIterator::AppendStats(const std::vector>& builders, const NOlap::TPortionInfo& portion) { - { - std::vector records; - for (auto&& r : portion.Records) { - records.emplace_back(&r); - } - if (Reverse) { - std::reverse(records.begin(), records.end()); - } - for (auto&& r : records) { - NArrow::Append(*builders[0], portion.GetPathId()); - const std::string prod = ::ToString(portion.GetMeta().Produced); - NArrow::Append(*builders[1], prod); - NArrow::Append(*builders[2], ReadMetadata->TabletId); - NArrow::Append(*builders[3], r->GetMeta().GetNumRowsVerified()); - NArrow::Append(*builders[4], r->GetMeta().GetRawBytesVerified()); - NArrow::Append(*builders[5], portion.GetPortionId()); - NArrow::Append(*builders[6], r->GetChunkIdx()); - NArrow::Append(*builders[7], ReadMetadata->GetColumnNameDef(r->GetColumnId()).value_or("undefined")); - NArrow::Append(*builders[8], r->GetColumnId()); - std::string blobIdString = r->BlobRange.BlobId.ToStringLegacy(); - NArrow::Append(*builders[9], blobIdString); - NArrow::Append(*builders[10], r->BlobRange.Offset); - NArrow::Append(*builders[11], r->BlobRange.Size); - NArrow::Append(*builders[12], !portion.HasRemoveSnapshot() || ReadMetadata->GetRequestSnapshot() < portion.GetRemoveSnapshot()); - std::string strTierName(portion.GetMeta().GetTierName().data(), portion.GetMeta().GetTierName().size()); - NArrow::Append(*builders[13], strTierName); - NArrow::Append(*builders[14], "COLUMN"); - } - } - { - std::vector indexes; - for (auto&& r : portion.GetIndexes()) { - indexes.emplace_back(&r); - } - if (Reverse) { - std::reverse(indexes.begin(), indexes.end()); - } - for (auto&& r : indexes) { - NArrow::Append(*builders[0], portion.GetPathId()); - const std::string prod = ::ToString(portion.GetMeta().Produced); - NArrow::Append(*builders[1], prod); - NArrow::Append(*builders[2], ReadMetadata->TabletId); - NArrow::Append(*builders[3], r->GetRecordsCount()); - NArrow::Append(*builders[4], r->GetRawBytes()); - NArrow::Append(*builders[5], portion.GetPortionId()); - NArrow::Append(*builders[6], r->GetChunkIdx()); - NArrow::Append(*builders[7], ReadMetadata->GetEntityName(r->GetIndexId()).value_or("undefined")); - NArrow::Append(*builders[8], r->GetIndexId()); - std::string blobIdString = r->GetBlobRange().BlobId.ToStringLegacy(); - NArrow::Append(*builders[9], blobIdString); - NArrow::Append(*builders[10], r->GetBlobRange().Offset); - NArrow::Append(*builders[11], r->GetBlobRange().Size); - NArrow::Append(*builders[12], !portion.HasRemoveSnapshot() || ReadMetadata->GetRequestSnapshot() < portion.GetRemoveSnapshot()); - std::string strTierName(portion.GetMeta().GetTierName().data(), portion.GetMeta().GetTierName().size()); - NArrow::Append(*builders[13], strTierName); - NArrow::Append(*builders[14], "INDEX"); - } - } -} - -} diff --git a/ydb/core/tx/columnshard/columnshard__stats_scan.h b/ydb/core/tx/columnshard/columnshard__stats_scan.h deleted file mode 100644 index 65fe0999ea70..000000000000 --- a/ydb/core/tx/columnshard/columnshard__stats_scan.h +++ /dev/null @@ -1,88 +0,0 @@ -#pragma once - -#include "columnshard__scan.h" -#include "columnshard_common.h" -#include "engines/reader/read_metadata.h" - -#include -#include -#include - -namespace NKikimr::NColumnShard { - -static const NTable::TScheme::TTableSchema PrimaryIndexStatsSchema = []() { - NTable::TScheme::TTableSchema schema; - NIceDb::NHelpers::TStaticSchemaFiller::Fill(schema); - return schema; -}(); - - -class TStatsColumnResolver : public IColumnResolver { -public: - TString GetColumnName(ui32 id, bool required) const override { - auto it = PrimaryIndexStatsSchema.Columns.find(id); - if (it == PrimaryIndexStatsSchema.Columns.end()) { - Y_ABORT_UNLESS(!required, "No column '%" PRIu32 "' in primary_index_stats", id); - return {}; - } - return it->second.Name; - } - - std::optional GetColumnIdOptional(const TString& name) const override { - auto it = PrimaryIndexStatsSchema.ColumnNames.find(name); - if (it == PrimaryIndexStatsSchema.ColumnNames.end()) { - return {}; - } else { - return it->second; - } - } - - const NTable::TScheme::TTableSchema& GetSchema() const override { - return PrimaryIndexStatsSchema; - } - - NSsa::TColumnInfo GetDefaultColumn() const override { - return NSsa::TColumnInfo::Original(1, "PathId"); - } -}; - - -class TStatsIterator : public TScanIteratorBase { -public: - TStatsIterator(const NOlap::TReadStatsMetadata::TConstPtr& readMetadata) - : ReadMetadata(readMetadata) - , Reverse(ReadMetadata->IsDescSorted()) - , KeySchema(NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, PrimaryIndexStatsSchema.KeyColumns)) - , ResultSchema(NOlap::MakeArrowSchema(PrimaryIndexStatsSchema.Columns, ReadMetadata->ResultColumnIds)) - , IndexPortions(ReadMetadata->IndexPortions) - { - if (ResultSchema->num_fields() == 0) { - ResultSchema = KeySchema; - } - if (Reverse) { - std::reverse(IndexPortions.begin(), IndexPortions.end()); - } - } - - bool Finished() const override { - return IndexPortions.empty(); - } - - std::optional GetBatch() override; - -private: - NOlap::TReadStatsMetadata::TConstPtr ReadMetadata; - bool Reverse{false}; - std::shared_ptr KeySchema; - std::shared_ptr ResultSchema; - - std::deque> IndexPortions; - - std::shared_ptr FillStatsBatch(); - - void ApplyRangePredicates(std::shared_ptr& batch); - - void AppendStats(const std::vector>& builders, const NOlap::TPortionInfo& portion); -}; - -} diff --git a/ydb/core/tx/columnshard/columnshard__write.cpp b/ydb/core/tx/columnshard/columnshard__write.cpp index 34c55c811cda..413b415fc79d 100644 --- a/ydb/core/tx/columnshard/columnshard__write.cpp +++ b/ydb/core/tx/columnshard/columnshard__write.cpp @@ -12,7 +12,7 @@ namespace NKikimr::NColumnShard { using namespace NTabletFlatExecutor; -void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, std::unique_ptr&& event, const TActorContext& ctx) { +void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx) { IncCounter(COUNTER_WRITE_FAIL); switch (overloadReason) { case EOverloadStatus::Disk: @@ -22,6 +22,10 @@ void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const IncCounter(COUNTER_WRITE_OVERLOAD); CSCounters.OnOverloadInsertTable(writeData.GetSize()); break; + case EOverloadStatus::OverloadMetadata: + IncCounter(COUNTER_WRITE_OVERLOAD); + CSCounters.OnOverloadMetadata(writeData.GetSize()); + break; case EOverloadStatus::ShardTxInFly: IncCounter(COUNTER_WRITE_OVERLOAD); CSCounters.OnOverloadShardTx(writeData.GetSize()); @@ -42,7 +46,7 @@ void TColumnShard::OverloadWriteFail(const EOverloadStatus overloadReason, const << " overload reason: [" << overloadReason << "]" << " at tablet " << TabletID()); - ctx.Send(writeData.GetWriteMeta().GetSource(), event.release()); + ctx.Send(writeData.GetWriteMeta().GetSource(), event.release(), 0, cookie); } TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) const { @@ -54,6 +58,11 @@ TColumnShard::EOverloadStatus TColumnShard::CheckOverloaded(const ui64 tableId) return EOverloadStatus::InsertTable; } + CSCounters.OnIndexMetadataLimit(NOlap::IColumnEngine::GetMetadataLimit()); + if (TablesManager.GetPrimaryIndex() && TablesManager.GetPrimaryIndex()->IsOverloadedByMetadata(NOlap::IColumnEngine::GetMetadataLimit())) { + return EOverloadStatus::OverloadMetadata; + } + ui64 txLimit = Settings.OverloadTxInFlight; ui64 writesLimit = Settings.OverloadWritesInFlight; ui64 writesSizeLimit = Settings.OverloadWritesSizeInFlight; @@ -116,8 +125,8 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteBlobsResult::TPtr& ev, const TActo } else { auto operation = OperationsManager->GetOperation((TWriteId)writeMeta.GetWriteId()); Y_ABORT_UNLESS(operation); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), operation->GetTxId(), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "put data fails"); - ctx.Send(writeMeta.GetSource(), result.release()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), operation->GetLockId(), NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "put data fails"); + ctx.Send(writeMeta.GetSource(), result.release(), 0, operation->GetCookie()); } CSCounters.OnFailedWriteResponse(EWriteFailReason::PutBlob); wBuffer.RemoveData(aggr, StoragesManager->GetInsertOperator()); @@ -149,6 +158,7 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex const auto& record = Proto(ev->Get()); const ui64 tableId = record.GetTableId(); const ui64 writeId = record.GetWriteId(); + const ui64 cookie = ev->Cookie; const TString dedupId = record.GetDedupId(); const auto source = ev->Sender; @@ -188,11 +198,12 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex return returnFail(COUNTER_WRITE_FAIL); } - NEvWrite::TWriteData writeData(writeMeta, arrowData, snapshotSchema->GetIndexInfo().GetReplaceKey(), StoragesManager->GetInsertOperator()->StartWritingAction("WRITING")); + NEvWrite::TWriteData writeData(writeMeta, arrowData, snapshotSchema->GetIndexInfo().GetReplaceKey(), + StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING)); auto overloadStatus = CheckOverloaded(tableId); if (overloadStatus != EOverloadStatus::None) { std::unique_ptr result = std::make_unique(TabletID(), writeData.GetWriteMeta(), NKikimrTxColumnShard::EResultStatus::OVERLOADED); - OverloadWriteFail(overloadStatus, writeData, std::move(result), ctx); + OverloadWriteFail(overloadStatus, writeData, cookie, std::move(result), ctx); CSCounters.OnFailedWriteResponse(EWriteFailReason::Overload); } else { if (ui64 writeId = (ui64)HasLongTxWrite(writeMeta.GetLongTxIdUnsafe(), writeMeta.GetWritePartId())) { @@ -221,41 +232,124 @@ void TColumnShard::Handle(TEvColumnShard::TEvWrite::TPtr& ev, const TActorContex } } +class TCommitOperation { +public: + using TPtr = std::shared_ptr; + + bool Parse(const NEvents::TDataEvents::TEvWrite& evWrite) { + LockId = evWrite.Record.GetLockTxId(); + TxId = evWrite.Record.GetTxId(); + KqpLocks = evWrite.Record.GetLocks(); + return !!LockId && !!TxId && KqpLocks.GetOp() == NKikimrDataEvents::TKqpLocks::Commit; + } + +private: + NKikimrDataEvents::TKqpLocks KqpLocks; + YDB_READONLY(ui64, LockId, 0); + YDB_READONLY(ui64, TxId, 0); +}; +class TProposeWriteTransaction : public TProposeTransactionBase { +public: + TProposeWriteTransaction(TColumnShard* self, TCommitOperation::TPtr op, const TActorId source, const ui64 cookie) + : TProposeTransactionBase(self) + , WriteCommit(op) + , Source(source) + , Cookie(cookie) + {} + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + void Complete(const TActorContext& ctx) override; + TTxType GetTxType() const override { return TXTYPE_PROPOSE; } + +private: + void OnProposeResult(TTxController::TProposeResult& proposeResult, const TTxController::TTxInfo& txInfo) override; + void OnProposeError(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo) override; + +private: + TCommitOperation::TPtr WriteCommit; + TActorId Source; + ui64 Cookie; + std::unique_ptr Result; +}; + +bool TProposeWriteTransaction::Execute(TTransactionContext& txc, const TActorContext&) { + NKikimrTxColumnShard::TCommitWriteTxBody proto; + proto.SetLockId(WriteCommit->GetLockId()); + TString txBody; + Y_ABORT_UNLESS(proto.SerializeToString(&txBody)); + ProposeTransaction(TTxController::TBasicTxInfo(NKikimrTxColumnShard::TX_KIND_COMMIT_WRITE, WriteCommit->GetTxId()), txBody, Source, Cookie, txc); + return true; +} + +void TProposeWriteTransaction::Complete(const TActorContext& ctx) { + ctx.Send(Source, Result.release(), 0, Cookie); +} + +void TProposeWriteTransaction::OnProposeResult(TTxController::TProposeResult& proposeResult, const TTxController::TTxInfo& txInfo) { + Y_UNUSED(proposeResult); + Result = NEvents::TDataEvents::TEvWriteResult::BuildPrepared(Self->TabletID(), txInfo.TxId, Self->GetProgressTxController().BuildCoordinatorInfo(txInfo)); +} + +void TProposeWriteTransaction::OnProposeError(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo) { + Y_UNUSED(proposeResult); + Result = NEvents::TDataEvents::TEvWriteResult::BuildError(Self->TabletID(), txInfo.TxId, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, proposeResult.GetStatusMessage()); +} + void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActorContext& ctx) { NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvWrite"); const auto& record = ev->Get()->Record; - const ui64 txId = ev->Get()->GetTxId(); const auto source = ev->Sender; + const auto cookie = ev->Cookie; + const auto behaviour = TOperationsManager::GetBehaviour(*ev->Get()); + + if (behaviour == EOperationBehaviour::Undefined) { + IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "invalid write event"); + ctx.Send(source, result.release(), 0, cookie); + return; + } + + if (behaviour == EOperationBehaviour::CommitWriteLock) { + auto commitOperation = std::make_shared(); + if (!commitOperation->Parse(*ev->Get())) { + IncCounter(COUNTER_WRITE_FAIL); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "invalid commit event"); + ctx.Send(source, result.release(), 0, cookie); + } + Execute(new TProposeWriteTransaction(this, commitOperation, source, cookie), ctx); + return; + } + + const ui64 lockId = (behaviour == EOperationBehaviour::InTxWrite) ? record.GetTxId() : record.GetLockTxId(); if (record.GetOperations().size() != 1) { IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), txId, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only single operation is supported"); - ctx.Send(source, result.release()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only single operation is supported"); + ctx.Send(source, result.release(), 0, cookie); return; } const auto& operation = record.GetOperations()[0]; - if (operation.GetType() != NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE) { IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), txId, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only REPLACE operation is supported"); - ctx.Send(source, result.release()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "only REPLACE operation is supported"); + ctx.Send(source, result.release(), 0, cookie); return; } if (!operation.GetTableId().HasSchemaVersion()) { IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), txId, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "schema version not set"); - ctx.Send(source, result.release()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "schema version not set"); + ctx.Send(source, result.release(), 0, cookie); return; } auto schema = TablesManager.GetPrimaryIndex()->GetVersionedIndex().GetSchema(operation.GetTableId().GetSchemaVersion()); if (!schema) { IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), txId, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "unknown schema version"); - ctx.Send(source, result.release()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "unknown schema version"); + ctx.Send(source, result.release(), 0, cookie); return; } @@ -263,30 +357,31 @@ void TColumnShard::Handle(NEvents::TDataEvents::TEvWrite::TPtr& ev, const TActor if (!TablesManager.IsReadyForWrite(tableId)) { IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), txId, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); - ctx.Send(source, result.release()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_INTERNAL_ERROR, "table not writable"); + ctx.Send(source, result.release(), 0, cookie); return; } auto arrowData = std::make_shared(schema); - if (!arrowData->Parse(operation, NEvWrite::TPayloadHelper(*ev->Get()))) { + if (!arrowData->Parse(operation, NEvWrite::TPayloadReader(*ev->Get()))) { IncCounter(COUNTER_WRITE_FAIL); - auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), txId, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); - ctx.Send(source, result.release()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST, "parsing data error"); + ctx.Send(source, result.release(), 0, cookie); } auto overloadStatus = CheckOverloaded(tableId); if (overloadStatus != EOverloadStatus::None) { NEvWrite::TWriteData writeData(NEvWrite::TWriteMeta(0, tableId, source), arrowData, nullptr, nullptr); - std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), txId, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); - OverloadWriteFail(overloadStatus, writeData, std::move(result), ctx); + std::unique_ptr result = NEvents::TDataEvents::TEvWriteResult::BuildError(TabletID(), 0, NKikimrDataEvents::TEvWriteResult::STATUS_OVERLOADED, "overload data error"); + OverloadWriteFail(overloadStatus, writeData, cookie, std::move(result), ctx); return; } auto wg = WritesMonitor.RegisterWrite(arrowData->GetSize()); - auto writeOperation = OperationsManager->RegisterOperation(txId); + auto writeOperation = OperationsManager->RegisterOperation(lockId, cookie); Y_ABORT_UNLESS(writeOperation); + writeOperation->SetBehaviour(behaviour); writeOperation->Start(*this, tableId, arrowData, source, ctx); } diff --git a/ydb/core/tx/columnshard/columnshard__write_index.cpp b/ydb/core/tx/columnshard/columnshard__write_index.cpp index fe423b8156d0..b98537ddbea7 100644 --- a/ydb/core/tx/columnshard/columnshard__write_index.cpp +++ b/ydb/core/tx/columnshard/columnshard__write_index.cpp @@ -26,9 +26,10 @@ void TColumnShard::Handle(TEvPrivate::TEvWriteIndex::TPtr& ev, const TActorConte ACFL_DEBUG("event", "TEvWriteIndex")("count", ev->Get()->IndexChanges->GetWritePortionsCount()); AFL_VERIFY(ev->Get()->IndexChanges->GetWritePortionsCount()); - const bool needDraftTransaction = ev->Get()->IndexChanges->GetBlobsAction().NeedDraftWritingTransaction(); auto writeController = std::make_shared(ctx.SelfID, ev->Release()); - if (needDraftTransaction) { + const TConclusion needDraftTransaction = writeController->GetBlobsAction().NeedDraftWritingTransaction(); + AFL_VERIFY(needDraftTransaction.IsSuccess())("error", needDraftTransaction.GetErrorMessage()); + if (*needDraftTransaction) { Execute(new TTxWriteDraft(this, writeController)); } else { ctx.Register(CreateWriteActor(TabletID(), writeController, TInstant::Max())); diff --git a/ydb/core/tx/columnshard/columnshard_common.cpp b/ydb/core/tx/columnshard/columnshard_common.cpp index dfd825f3b23d..d9f0dcc5e79b 100644 --- a/ydb/core/tx/columnshard/columnshard_common.cpp +++ b/ydb/core/tx/columnshard/columnshard_common.cpp @@ -10,91 +10,9 @@ using EAggregate = NArrow::EAggregate; using TAssign = NSsa::TAssign; using TAggregateAssign = NSsa::TAggregateAssign; -std::vector ExtractTypes(const std::vector>& columns) { - std::vector types; - types.reserve(columns.size()); - for (auto& [name, type] : columns) { - types.push_back(type); - } - return types; -} - -TString FromCells(const TConstArrayRef& cells, const std::vector>& columns) { - Y_ABORT_UNLESS(cells.size() == columns.size()); - if (cells.empty()) { - return {}; - } - - std::vector types = ExtractTypes(columns); - - NArrow::TArrowBatchBuilder batchBuilder; - batchBuilder.Reserve(1); - bool ok = batchBuilder.Start(columns); - Y_ABORT_UNLESS(ok); - - batchBuilder.AddRow(NKikimr::TDbTupleRef(), NKikimr::TDbTupleRef(types.data(), cells.data(), cells.size())); - - auto batch = batchBuilder.FlushBatch(false); - Y_ABORT_UNLESS(batch); - Y_ABORT_UNLESS(batch->num_columns() == (int)cells.size()); - Y_ABORT_UNLESS(batch->num_rows() == 1); - return NArrow::SerializeBatchNoCompression(batch); -} } using EOperation = NArrow::EOperation; using TPredicate = NOlap::TPredicate; -std::pair RangePredicates(const TSerializedTableRange& range, - const std::vector>& columns) { - std::vector leftCells; - std::vector> leftColumns; - bool leftTrailingNull = false; - { - TConstArrayRef cells = range.From.GetCells(); - const size_t size = cells.size(); - Y_ASSERT(size <= columns.size()); - leftCells.reserve(size); - leftColumns.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (!cells[i].IsNull()) { - leftCells.push_back(cells[i]); - leftColumns.push_back(columns[i]); - leftTrailingNull = false; - } else { - leftTrailingNull = true; - } - } - } - - std::vector rightCells; - std::vector> rightColumns; - bool rightTrailingNull = false; - { - TConstArrayRef cells = range.To.GetCells(); - const size_t size = cells.size(); - Y_ASSERT(size <= columns.size()); - rightCells.reserve(size); - rightColumns.reserve(size); - for (size_t i = 0; i < size; ++i) { - if (!cells[i].IsNull()) { - rightCells.push_back(cells[i]); - rightColumns.push_back(columns[i]); - rightTrailingNull = false; - } else { - rightTrailingNull = true; - } - } - } - - const bool fromInclusive = range.FromInclusive || leftTrailingNull; - const bool toInclusive = range.ToInclusive && !rightTrailingNull; - - TString leftBorder = FromCells(leftCells, leftColumns); - TString rightBorder = FromCells(rightCells, rightColumns); - return std::make_pair( - TPredicate(fromInclusive ? EOperation::GreaterEqual : EOperation::Greater, leftBorder, NArrow::MakeArrowSchema(leftColumns)), - TPredicate(toInclusive ? EOperation::LessEqual : EOperation::Less, rightBorder, NArrow::MakeArrowSchema(rightColumns))); -} - } diff --git a/ydb/core/tx/columnshard/columnshard_common.h b/ydb/core/tx/columnshard/columnshard_common.h index c2460be231d0..455f39a512cc 100644 --- a/ydb/core/tx/columnshard/columnshard_common.h +++ b/ydb/core/tx/columnshard/columnshard_common.h @@ -1,7 +1,7 @@ #pragma once -#include "defs.h" -#include "engines/reader/description.h" -#include +#include "engines/reader/common/description.h" +#include "engines/predicate/predicate.h" + #include namespace NKikimr::NOlap { @@ -10,13 +10,10 @@ namespace NKikimr::NOlap { namespace NKikimr::NColumnShard { -using TReadDescription = NOlap::TReadDescription; +using TReadDescription = NOlap::NReader::TReadDescription; using IColumnResolver = NOlap::IColumnResolver; using NOlap::TWriteId; -std::pair -RangePredicates(const TSerializedTableRange& range, const std::vector>& columns); - class TBatchCache { public: using TUnifiedBlobId = NOlap::TUnifiedBlobId; diff --git a/ydb/core/tx/columnshard/columnshard_impl.cpp b/ydb/core/tx/columnshard/columnshard_impl.cpp index daa499e55488..dd7aaf540533 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.cpp +++ b/ydb/core/tx/columnshard/columnshard_impl.cpp @@ -1,9 +1,9 @@ #include "columnshard_impl.h" +#include "blob.h" #include "columnshard_schema.h" +#include "common/tablet_id.h" #include "blobs_reader/task.h" #include "blobs_reader/events.h" -#include "engines/changes/ttl.h" -#include "engines/changes/cleanup.h" #include "blobs_action/bs/storage.h" #include "resource_subscriber/task.h" @@ -11,9 +11,27 @@ #include "blobs_action/tier/storage.h" #endif +#include "blobs_reader/actor.h" +#include "blobs_action/storages_manager/manager.h" +#include "blobs_action/transaction/tx_remove_blobs.h" #include "blobs_action/transaction/tx_gc_insert_table.h" #include "blobs_action/transaction/tx_gc_indexed.h" + +#include "data_sharing/destination/session/destination.h" +#include "data_sharing/source/session/source.h" +#include "data_sharing/common/transactions/tx_extension.h" + +#include "engines/changes/indexation.h" +#include "engines/changes/cleanup_portions.h" +#include "engines/changes/cleanup_tables.h" +#include "engines/changes/ttl.h" + +#include "export/manager/manager.h" + +#include "resource_subscriber/counters.h" + #include "hooks/abstract/abstract.h" + #include #include #include @@ -22,13 +40,6 @@ #include #include #include -#include "resource_subscriber/counters.h" -#include "blobs_reader/actor.h" - - -#include -#include -#include namespace NKikimr::NColumnShard { @@ -50,38 +61,15 @@ NTabletPipe::TClientConfig GetPipeClientConfig() { } -class TColumnShard::TStoragesManager: public NOlap::IStoragesManager { -private: - using TBase = NOlap::IStoragesManager; - TColumnShard& Shard; -protected: - virtual std::shared_ptr DoBuildOperator(const TString& storageId) override { - if (storageId == TBase::DefaultStorageId) { - return std::make_shared(storageId, Shard.SelfId(), Shard.Info(), Shard.Executor()->Generation()); - } else if (!Shard.Tiers) { - return nullptr; - } else { -#ifndef KIKIMR_DISABLE_S3_OPS - return std::make_shared(storageId, Shard); -#else - return nullptr; -#endif - } - } -public: - TStoragesManager(TColumnShard& shard) - : Shard(shard) { - - } -}; - TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet) : TActor(&TThis::StateInit) , TTabletExecutedFlat(info, tablet, nullptr) , ProgressTxController(std::make_unique(*this)) - , PeriodicWakeupActivationPeriod(GetControllerPeriodicWakeupActivationPeriod()) - , StatsReportInterval(GetControllerStatsReportInterval()) - , StoragesManager(std::make_shared(*this)) + , StoragesManager(std::make_shared(*this)) + , ExportsManager(std::make_shared()) + , DataLocksManager(std::make_shared()) + , PeriodicWakeupActivationPeriod(NYDBTest::TControllers::GetColumnShardController()->GetPeriodicWakeupActivationPeriod(TSettings::DefaultPeriodicWakeupActivationPeriod)) + , StatsReportInterval(NYDBTest::TControllers::GetColumnShardController()->GetStatsReportInterval(TSettings::DefaultStatsReportInterval)) , InFlightReadsTracker(StoragesManager) , TablesManager(StoragesManager, info->TabletID) , PipeClientCache(NTabletPipe::CreateBoundedClientCache(new NTabletPipe::TBoundedClientCacheConfig(), GetPipeClientConfig())) @@ -90,7 +78,6 @@ TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet) , InsertTaskSubscription(NOlap::TInsertColumnEngineChanges::StaticTypeName(), SubscribeCounters) , CompactTaskSubscription(NOlap::TCompactColumnEngineChanges::StaticTypeName(), SubscribeCounters) , TTLTaskSubscription(NOlap::TTTLColumnEngineChanges::StaticTypeName(), SubscribeCounters) - , ReadCounters("Read") , ScanCounters("Scan") , WritesMonitor(*this) , NormalizerController(StoragesManager, SubscribeCounters) @@ -102,20 +89,16 @@ TColumnShard::TColumnShard(TTabletStorageInfo* info, const TActorId& tablet) ETxTypes_descriptor >()); TabletCounters = TabletCountersPtr.get(); - - NormalizerController.RegisterNormalizer(std::make_shared()); - NormalizerController.RegisterNormalizer(std::make_shared(Info())); - NormalizerController.RegisterNormalizer(std::make_shared(Info())); + NOlap::TNormalizationController::TInitContext initCtx(Info()); + NormalizerController.InitNormalizers(initCtx); } void TColumnShard::OnDetach(const TActorContext& ctx) { - CleanupActors(ctx); Die(ctx); } void TColumnShard::OnTabletDead(TEvTablet::TEvTabletDead::TPtr& ev, const TActorContext& ctx) { Y_UNUSED(ev); - CleanupActors(ctx); Die(ctx); } @@ -209,7 +192,8 @@ ui64 TColumnShard::GetOutdatedStep() const { } ui64 TColumnShard::GetMinReadStep() const { - ui64 delayMillisec = MaxReadStaleness.MilliSeconds(); + const TDuration maxReadStaleness = NYDBTest::TControllers::GetColumnShardController()->GetReadTimeoutClean(TDuration::Minutes(5)); + ui64 delayMillisec = maxReadStaleness.MilliSeconds(); ui64 passedStep = GetOutdatedStep(); ui64 minReadStep = (passedStep > delayMillisec ? passedStep - delayMillisec : 0); return minReadStep; @@ -393,7 +377,7 @@ void TColumnShard::RunEnsureTable(const NKikimrTxColumnShard::TCreateTable& tabl << " ttl settings: " << tableProto.GetTtlSettings() << " at tablet " << TabletID()); - TTableInfo::TTableVersionInfo tableVerProto; + NKikimrTxColumnShard::TTableVersionInfo tableVerProto; tableVerProto.SetPathId(pathId); // check schema changed @@ -414,21 +398,28 @@ void TColumnShard::RunEnsureTable(const NKikimrTxColumnShard::TCreateTable& tabl *tableVerProto.MutableSchema() = tableProto.GetSchema(); } - TTableInfo table(pathId); - if (tableProto.HasTtlSettings()) { - const auto& ttlSettings = tableProto.GetTtlSettings(); - *tableVerProto.MutableTtlSettings() = ttlSettings; - if (ttlSettings.HasUseTiering()) { - table.SetTieringUsage(ttlSettings.GetUseTiering()); - ActivateTiering(pathId, table.GetTieringUsage()); + { + bool needTieringActivation = false; + TTableInfo table(pathId); + if (tableProto.HasTtlSettings()) { + const auto& ttlSettings = tableProto.GetTtlSettings(); + *tableVerProto.MutableTtlSettings() = ttlSettings; + if (ttlSettings.HasUseTiering()) { + table.SetTieringUsage(ttlSettings.GetUseTiering()); + needTieringActivation = true; + } + } + const TString tieringName = table.GetTieringUsage(); + TablesManager.RegisterTable(std::move(table), db); + if (needTieringActivation) { + ActivateTiering(pathId, tieringName); } } tableVerProto.SetSchemaPresetVersionAdj(tableProto.GetSchemaPresetVersionAdj()); tableVerProto.SetTtlSettingsPresetVersionAdj(tableProto.GetTtlSettingsPresetVersionAdj()); - TablesManager.RegisterTable(std::move(table), db); - TablesManager.AddTableVersion(pathId, version, tableVerProto, db); + TablesManager.AddTableVersion(pathId, version, tableVerProto, db, Tiers); SetCounter(COUNTER_TABLES, TablesManager.GetTables().size()); SetCounter(COUNTER_TABLE_PRESETS, TablesManager.GetSchemaPresets().size()); @@ -447,7 +438,7 @@ void TColumnShard::RunAlterTable(const NKikimrTxColumnShard::TAlterTable& alterP << " ttl settings: " << alterProto.GetTtlSettings() << " at tablet " << TabletID()); - TTableInfo::TTableVersionInfo tableVerProto; + NKikimrTxColumnShard::TTableVersionInfo tableVerProto; if (alterProto.HasSchemaPreset()) { tableVerProto.SetSchemaPresetId(alterProto.GetSchemaPreset().GetId()); TablesManager.AddSchemaVersion(alterProto.GetSchemaPreset().GetId(), version, alterProto.GetSchemaPreset().GetSchema(), db); @@ -465,7 +456,7 @@ void TColumnShard::RunAlterTable(const NKikimrTxColumnShard::TAlterTable& alterP Schema::SaveTableInfo(db, pathId, tieringUsage); tableVerProto.SetSchemaPresetVersionAdj(alterProto.GetSchemaPresetVersionAdj()); - TablesManager.AddTableVersion(pathId, version, tableVerProto, db); + TablesManager.AddTableVersion(pathId, version, tableVerProto, db, Tiers); } void TColumnShard::RunDropTable(const NKikimrTxColumnShard::TDropTable& dropProto, const NOlap::TSnapshot& version, @@ -513,36 +504,29 @@ void TColumnShard::RunAlterStore(const NKikimrTxColumnShard::TAlterStore& proto, } } -void TColumnShard::EnqueueBackgroundActivities(bool periodic, TBackgroundActivity activity) { +void TColumnShard::EnqueueBackgroundActivities(const bool periodic) { TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())); - ACFL_DEBUG("event", "EnqueueBackgroundActivities")("periodic", periodic)("activity", activity.DebugString()); + ACFL_DEBUG("event", "EnqueueBackgroundActivities")("periodic", periodic); + StoragesManager->GetOperatorVerified(NOlap::IStoragesManager::DefaultStorageId); + StoragesManager->GetSharedBlobsManager()->GetStorageManagerVerified(NOlap::IStoragesManager::DefaultStorageId); CSCounters.OnStartBackground(); - SendPeriodicStats(); if (!TablesManager.HasPrimaryIndex()) { LOG_S_NOTICE("Background activities cannot be started: no index at tablet " << TabletID()); return; } +// !!!!!! MUST BE FIRST THROUGH DATA HAVE TO BE SAME IN SESSIONS AFTER TABLET RESTART + SharingSessionsManager->Start(*this); - if (activity.HasIndexation()) { - SetupIndexation(); - } - - if (activity.HasCompaction()) { - SetupCompaction(); - } - - if (activity.HasCleanup()) { - SetupCleanup(); - } - - if (activity.HasTtl()) { - SetupTtl(); - } + ExportsManager->Start(this); + SetupIndexation(); + SetupCompaction(); + SetupCleanupPortions(); + SetupCleanupTables(); + SetupTtl(); SetupGC(); - SetupCleanupInsertTable(); } @@ -553,11 +537,12 @@ class TChangesTask: public NConveyor::ITask { const ui64 TabletId; const TActorId ParentActorId; TString ClassId; + NOlap::TSnapshot LastCompletedTx; protected: virtual bool DoExecute() override { NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletId)("parent_id", ParentActorId)); { - NOlap::TConstructionContext context(TxEvent->IndexInfo, Counters); + NOlap::TConstructionContext context(*TxEvent->IndexInfo, Counters, LastCompletedTx); Y_ABORT_UNLESS(TxEvent->IndexChanges->ConstructBlobs(context).Ok()); if (!TxEvent->IndexChanges->GetWritePortionsCount()) { TxEvent->SetPutStatus(NKikimrProto::OK); @@ -571,11 +556,12 @@ class TChangesTask: public NConveyor::ITask { return ClassId; } - TChangesTask(std::unique_ptr&& txEvent, const TIndexationCounters& counters, const ui64 tabletId, const TActorId parentActorId) + TChangesTask(std::unique_ptr&& txEvent, const TIndexationCounters& counters, const ui64 tabletId, const TActorId parentActorId, NOlap::TSnapshot lastCompletedTx) : TxEvent(std::move(txEvent)) , Counters(counters) , TabletId(tabletId) , ParentActorId(parentActorId) + , LastCompletedTx(lastCompletedTx) { Y_ABORT_UNLESS(TxEvent); Y_ABORT_UNLESS(TxEvent->IndexChanges); @@ -590,37 +576,62 @@ class TChangesReadTask: public NOlap::NBlobOperations::NRead::ITask { const ui64 TabletId; std::unique_ptr TxEvent; TIndexationCounters Counters; + NOlap::TSnapshot LastCompletedTx; protected: virtual void DoOnDataReady(const std::shared_ptr& resourcesGuard) override { TxEvent->IndexChanges->Blobs = ExtractBlobsData(); TxEvent->IndexChanges->ResourcesGuard = resourcesGuard; const bool isInsert = !!dynamic_pointer_cast(TxEvent->IndexChanges); - std::shared_ptr task = std::make_shared(std::move(TxEvent), Counters, TabletId, ParentActorId); + std::shared_ptr task = std::make_shared(std::move(TxEvent), Counters, TabletId, ParentActorId, LastCompletedTx); if (isInsert) { NConveyor::TInsertServiceOperator::SendTaskToExecute(task); } else { NConveyor::TCompServiceOperator::SendTaskToExecute(task); } } - virtual bool DoOnError(const TBlobRange& range, const NOlap::IBlobsReadingAction::TErrorStatus& status) override { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "DoOnError")("blob_id", range)("status", status.GetErrorMessage())("status_code", status.GetStatus()); - AFL_VERIFY(false)("blob_id", range)("status", status.GetStatus()); + virtual bool DoOnError(const TString& storageId, const NOlap::TBlobRange& range, const NOlap::IBlobsReadingAction::TErrorStatus& status) override { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DoOnError")("storage_id", storageId)("blob_id", range)("status", status.GetErrorMessage())("status_code", status.GetStatus()); + AFL_VERIFY(status.GetStatus() != NKikimrProto::EReplyStatus::NODATA)("blob_id", range)("status", status.GetStatus())("error", status.GetErrorMessage())("type", TxEvent->IndexChanges->TypeString())("task_id", TxEvent->IndexChanges->GetTaskIdentifier()) + ("debug", TxEvent->IndexChanges->DebugString()); TxEvent->SetPutStatus(NKikimrProto::ERROR); + Counters.ReadErrors->Add(1); TActorContext::AsActorContext().Send(ParentActorId, std::move(TxEvent)); return false; } public: - TChangesReadTask(std::unique_ptr&& event, const TActorId parentActorId, const ui64 tabletId, const TIndexationCounters& counters) + TChangesReadTask(std::unique_ptr&& event, const TActorId parentActorId, const ui64 tabletId, const TIndexationCounters& counters, NOlap::TSnapshot lastCompletedTx) : TBase(event->IndexChanges->GetReadingActions(), event->IndexChanges->TypeString(), event->IndexChanges->GetTaskIdentifier()) , ParentActorId(parentActorId) , TabletId(tabletId) , TxEvent(std::move(event)) , Counters(counters) + , LastCompletedTx(lastCompletedTx) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start_changes")("type", TxEvent->IndexChanges->TypeString())("task_id", TxEvent->IndexChanges->GetTaskIdentifier()); } }; +class TInsertChangesReadTask: public TChangesReadTask, public TMonitoringObjectsCounter { +private: + using TBase = TChangesReadTask; +public: + using TBase::TBase; +}; + +class TCompactChangesReadTask: public TChangesReadTask, public TMonitoringObjectsCounter { +private: + using TBase = TChangesReadTask; +public: + using TBase::TBase; +}; + +class TTTLChangesReadTask: public TChangesReadTask, public TMonitoringObjectsCounter { +private: + using TBase = TChangesReadTask; +public: + using TBase::TBase; +}; + void TColumnShard::StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex) { CSCounters.IndexationInput(bytesToIndex); @@ -634,9 +645,9 @@ void TColumnShard::StartIndexTask(std::vector&& dat auto indexChanges = TablesManager.MutablePrimaryIndex().StartInsert(std::move(data)); Y_ABORT_UNLESS(indexChanges); - auto actualIndexInfo = TablesManager.GetPrimaryIndex()->GetVersionedIndex(); + auto actualIndexInfo = std::make_shared(TablesManager.GetPrimaryIndex()->GetVersionedIndex()); indexChanges->Start(*this); - auto ev = std::make_unique(std::move(actualIndexInfo), indexChanges, Settings.CacheDataAfterIndexing); + auto ev = std::make_unique(actualIndexInfo, indexChanges, Settings.CacheDataAfterIndexing); const TString externalTaskId = indexChanges->GetTaskIdentifier(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "indexation")("bytes", bytesToIndex)("blobs_count", dataToIndex.size())("max_limit", (i64)Limits.MaxInsertBytes) @@ -644,11 +655,11 @@ void TColumnShard::StartIndexTask(std::vector&& dat NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), IndexationCounters), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, InsertTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), IndexationCounters, GetLastPlannedSnapshot()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, InsertTaskSubscription)); } void TColumnShard::SetupIndexation() { - if (!AppDataVerified().ColumnShardConfig.GetIndexationEnabled()) { + if (!AppDataVerified().ColumnShardConfig.GetIndexationEnabled() || !NYDBTest::TControllers::GetColumnShardController()->IsBackgroundEnabled(NYDBTest::ICSController::EBackground::Indexation)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_indexation")("reason", "disabled"); return; } @@ -675,7 +686,8 @@ void TColumnShard::SetupIndexation() { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "start_indexation_tasks")("insert_overload_size", InsertTable->GetCountersCommitted().Bytes); CSCounters.OnSetupIndexation(); - i64 bytesToIndex = 0; + ui64 bytesToIndex = 0; + ui64 txBytesWrite = 0; std::vector dataToIndex; dataToIndex.reserve(TLimits::MIN_SMALL_BLOBS_TO_INSERT); for (auto it = InsertTable->GetPathPriorities().rbegin(); it != InsertTable->GetPathPriorities().rend(); ++it) { @@ -683,11 +695,13 @@ void TColumnShard::SetupIndexation() { for (auto& data : pathInfo->GetCommitted()) { Y_ABORT_UNLESS(data.BlobSize()); bytesToIndex += data.BlobSize(); + txBytesWrite += data.GetTxVolume(); dataToIndex.push_back(&data); - if (bytesToIndex >= Limits.MaxInsertBytes) { + if (bytesToIndex >= (ui64)Limits.MaxInsertBytes || txBytesWrite >= NOlap::TGlobalLimits::TxWriteLimitBytes) { StartIndexTask(std::move(dataToIndex), bytesToIndex); dataToIndex.clear(); bytesToIndex = 0; + txBytesWrite = 0; } } } @@ -698,7 +712,7 @@ void TColumnShard::SetupIndexation() { } void TColumnShard::SetupCompaction() { - if (!AppDataVerified().ColumnShardConfig.GetCompactionEnabled()) { + if (!AppDataVerified().ColumnShardConfig.GetCompactionEnabled() || !NYDBTest::TControllers::GetColumnShardController()->IsBackgroundEnabled(NYDBTest::ICSController::EBackground::Compaction)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_compaction")("reason", "disabled"); return; } @@ -706,8 +720,7 @@ void TColumnShard::SetupCompaction() { BackgroundController.CheckDeadlines(); while (BackgroundController.GetCompactionsCount() < TSettings::MAX_ACTIVE_COMPACTIONS) { - auto limits = CompactionLimits.Get(); - auto indexChanges = TablesManager.MutablePrimaryIndex().StartCompaction(limits, BackgroundController.GetConflictCompactionPortions()); + auto indexChanges = TablesManager.MutablePrimaryIndex().StartCompaction(DataLocksManager); if (!indexChanges) { LOG_S_DEBUG("Compaction not started: cannot prepare compaction at tablet " << TabletID()); break; @@ -715,83 +728,101 @@ void TColumnShard::SetupCompaction() { indexChanges->Start(*this); - auto actualIndexInfo = TablesManager.GetPrimaryIndex()->GetVersionedIndex(); - auto ev = std::make_unique(std::move(actualIndexInfo), indexChanges, Settings.CacheDataAfterCompaction); + auto actualIndexInfo = std::make_shared(TablesManager.GetPrimaryIndex()->GetVersionedIndex()); + auto ev = std::make_unique(actualIndexInfo, indexChanges, Settings.CacheDataAfterCompaction); const TString externalTaskId = indexChanges->GetTaskIdentifier(); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "compaction")("external_task_id", externalTaskId); NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, CompactTaskSubscription)); + std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters, GetLastPlannedSnapshot()), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, CompactTaskSubscription)); } LOG_S_DEBUG("ActiveCompactions: " << BackgroundController.GetCompactionsCount() << " at tablet " << TabletID()); } -bool TColumnShard::SetupTtl(const THashMap& pathTtls, const bool force) { - if (!AppDataVerified().ColumnShardConfig.GetTTLEnabled()) { +bool TColumnShard::SetupTtl(const THashMap& pathTtls) { + if (!AppDataVerified().ColumnShardConfig.GetTTLEnabled() || !NYDBTest::TControllers::GetColumnShardController()->IsBackgroundEnabled(NYDBTest::ICSController::EBackground::TTL)) { AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_ttl")("reason", "disabled"); return false; } CSCounters.OnSetupTtl(); - if (BackgroundController.IsTtlActive()) { - ACFL_DEBUG("background", "ttl")("skip_reason", "in_progress"); - return false; - } - if (force) { - TablesManager.MutablePrimaryIndex().OnTieringModified(Tiers, TablesManager.GetTtl()); - } THashMap eviction = pathTtls; for (auto&& i : eviction) { ACFL_DEBUG("background", "ttl")("path", i.first)("info", i.second.GetDebugString()); } - auto actualIndexInfo = TablesManager.GetPrimaryIndex()->GetVersionedIndex(); + auto actualIndexInfo = std::make_shared(TablesManager.GetPrimaryIndex()->GetVersionedIndex()); const ui64 memoryUsageLimit = HasAppData() ? AppDataVerified().ColumnShardConfig.GetTieringsMemoryLimit() : ((ui64)512 * 1024 * 1024); - std::shared_ptr indexChanges = TablesManager.MutablePrimaryIndex().StartTtl( - eviction, BackgroundController.GetConflictTTLPortions(), memoryUsageLimit); + std::vector> indexChanges = TablesManager.MutablePrimaryIndex().StartTtl(eviction, DataLocksManager, memoryUsageLimit); - if (!indexChanges) { + if (indexChanges.empty()) { ACFL_DEBUG("background", "ttl")("skip_reason", "no_changes"); return false; } - const TString externalTaskId = indexChanges->GetTaskIdentifier(); - const bool needWrites = indexChanges->NeedConstruction(); - ACFL_DEBUG("background", "ttl")("need_writes", needWrites); - - indexChanges->Start(*this); - auto ev = std::make_unique(std::move(actualIndexInfo), indexChanges, false); - NYDBTest::TControllers::GetColumnShardController()->OnWriteIndexStart(TabletID(), indexChanges->TypeString()); - if (needWrites) { - NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( - ResourceSubscribeActor, std::make_shared( - std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters), 0, indexChanges->CalcMemoryForUsage(), externalTaskId, TTLTaskSubscription)); - } else { - ev->SetPutStatus(NKikimrProto::OK); - ActorContext().Send(SelfId(), std::move(ev)); + for (auto&& i : indexChanges) { + const TString externalTaskId = i->GetTaskIdentifier(); + const bool needWrites = i->NeedConstruction(); + ACFL_DEBUG("background", "ttl")("need_writes", needWrites); + i->Start(*this); + auto ev = std::make_unique(actualIndexInfo, i, false); + if (needWrites) { + NOlap::NResourceBroker::NSubscribe::ITask::StartResourceSubscription( + ResourceSubscribeActor, std::make_shared( + std::make_shared(std::move(ev), SelfId(), TabletID(), CompactionCounters, GetLastPlannedSnapshot()), 0, i->CalcMemoryForUsage(), externalTaskId, TTLTaskSubscription)); + } else { + ev->SetPutStatus(NKikimrProto::OK); + ActorContext().Send(SelfId(), std::move(ev)); + } } return true; } -void TColumnShard::SetupCleanup() { +void TColumnShard::SetupCleanupPortions() { CSCounters.OnSetupCleanup(); - if (BackgroundController.IsCleanupActive()) { + if (!AppDataVerified().ColumnShardConfig.GetCleanupEnabled() || !NYDBTest::TControllers::GetColumnShardController()->IsBackgroundEnabled(NYDBTest::ICSController::EBackground::Cleanup)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_cleanup")("reason", "disabled"); + return; + } + if (BackgroundController.IsCleanupPortionsActive()) { ACFL_DEBUG("background", "cleanup")("skip_reason", "in_progress"); return; } NOlap::TSnapshot cleanupSnapshot{GetMinReadStep(), 0}; - auto changes = - TablesManager.MutablePrimaryIndex().StartCleanup(cleanupSnapshot, TablesManager.MutablePathsToDrop(), TLimits::MAX_TX_RECORDS); + auto changes = TablesManager.MutablePrimaryIndex().StartCleanupPortions(cleanupSnapshot, TablesManager.GetPathsToDrop(), DataLocksManager); if (!changes) { ACFL_DEBUG("background", "cleanup")("skip_reason", "no_changes"); return; } ACFL_DEBUG("background", "cleanup")("changes_info", changes->DebugString()); - auto actualIndexInfo = TablesManager.GetPrimaryIndex()->GetVersionedIndex(); - auto ev = std::make_unique(std::move(actualIndexInfo), changes, false); + auto actualIndexInfo = std::make_shared(TablesManager.GetPrimaryIndex()->GetVersionedIndex()); + auto ev = std::make_unique(actualIndexInfo, changes, false); + ev->SetPutStatus(NKikimrProto::OK); // No new blobs to write + + changes->Start(*this); + + Send(SelfId(), ev.release()); +} + +void TColumnShard::SetupCleanupTables() { + CSCounters.OnSetupCleanup(); + if (BackgroundController.IsCleanupTablesActive()) { + ACFL_DEBUG("background", "cleanup")("skip_reason", "in_progress"); + return; + } + + auto changes = TablesManager.MutablePrimaryIndex().StartCleanupTables(TablesManager.MutablePathsToDrop()); + if (!changes) { + ACFL_DEBUG("background", "cleanup")("skip_reason", "no_changes"); + return; + } + + ACFL_DEBUG("background", "cleanup")("changes_info", changes->DebugString()); + auto actualIndexInfo = std::make_shared(TablesManager.GetPrimaryIndex()->GetVersionedIndex()); + auto ev = std::make_unique(actualIndexInfo, changes, false); ev->SetPutStatus(NKikimrProto::OK); // No new blobs to write changes->Start(*this); @@ -800,6 +831,10 @@ void TColumnShard::SetupCleanup() { } void TColumnShard::SetupGC() { + if (!NYDBTest::TControllers::GetColumnShardController()->IsBackgroundEnabled(NYDBTest::ICSController::EBackground::GC)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_gc")("reason", "disabled"); + return; + } for (auto&& i : StoragesManager->GetStorages()) { i.second->StartGC(); } @@ -825,27 +860,247 @@ void TColumnShard::Die(const TActorContext& ctx) { CleanupActors(ctx); NTabletPipe::CloseAndForgetClient(SelfId(), StatsReportPipe); UnregisterMediatorTimeCast(); + NYDBTest::TControllers::GetColumnShardController()->OnTabletStopped(*this); return IActor::Die(ctx); } +void TColumnShard::Handle(NActors::TEvents::TEvUndelivered::TPtr& ev, const TActorContext&) { + ui32 eventType = ev->Get()->SourceType; + switch (eventType) { + case NOlap::NDataSharing::NEvents::TEvSendDataFromSource::EventType: + case NOlap::NDataSharing::NEvents::TEvAckDataToSource::EventType: + case NOlap::NDataSharing::NEvents::TEvApplyLinksModification::EventType: + case NOlap::NDataSharing::NEvents::TEvStartToSource::EventType: + case NOlap::NDataSharing::NEvents::TEvAckFinishToSource::EventType: + case NOlap::NDataSharing::NEvents::TEvFinishedFromSource::EventType: + SharingSessionsManager->InitializeEventsExchange(*this, ev->Cookie); + break; + } +} + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvProposeFromInitiator::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvProposeFromInitiator"); + auto reqSession = std::make_shared(); + auto conclusion = reqSession->DeserializeDataFromProto(ev->Get()->Record.GetSession(), TablesManager.GetPrimaryIndexAsVerified()); + if (!conclusion) { + if (!reqSession->GetInitiatorController()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_start_data_sharing_from_initiator"); + } else { + reqSession->GetInitiatorController().ProposeError(ev->Get()->Record.GetSession().GetSessionId(), conclusion.GetErrorMessage()); + } + return; + } + + auto currentSession = SharingSessionsManager->GetDestinationSession(reqSession->GetSessionId()); + if (currentSession) { + reqSession->GetInitiatorController().ProposeError(ev->Get()->Record.GetSession().GetSessionId(), "Session exists already"); + return; + } + + auto txConclusion = SharingSessionsManager->ProposeDestSession(this, reqSession); + Execute(txConclusion.release(), ctx); +} + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvConfirmFromInitiator::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvConfirmFromInitiator"); + auto currentSession = SharingSessionsManager->GetDestinationSession(ev->Get()->Record.GetSessionId()); + if (!currentSession) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvStartFromInitiator")("problem", "not_exists_session")("session_id", ev->Get()->Record.GetSessionId()); + return; + } + if (currentSession->IsConfirmed()) { + currentSession->GetInitiatorController().ConfirmSuccess(ev->Get()->Record.GetSessionId()); + } else { + + auto txConclusion = SharingSessionsManager->ConfirmDestSession(this, currentSession); + Execute(txConclusion.release(), ctx); + } +} + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvStartToSource::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvStartToSource"); + auto reqSession = std::make_shared((NOlap::TTabletId)TabletID()); + reqSession->DeserializeFromProto(ev->Get()->Record.GetSession(), {}, {}).Validate(); + + auto currentSession = SharingSessionsManager->GetSourceSession(reqSession->GetSessionId()); + if (currentSession) { + AFL_VERIFY(currentSession->IsEqualTo(*reqSession))("session_current", currentSession->DebugString())("session_new", reqSession->DebugString()); + return; + } + + auto txConclusion = SharingSessionsManager->InitializeSourceSession(this, reqSession); + Execute(txConclusion.release(), ctx); +}; + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvSendDataFromSource::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvSendDataFromSource"); + auto currentSession = SharingSessionsManager->GetDestinationSession(ev->Get()->Record.GetSessionId()); + if (!currentSession) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "ignore_inactual_sharing_session")("sesion_id", ev->Get()->Record.GetSessionId()); + return; + } + + THashMap dataByPathId; + for (auto&& i : ev->Get()->Record.GetPathIdData()) { + auto schema = TablesManager.GetPrimaryIndexAsVerified().GetVersionedIndex().GetLastSchema(); + AFL_VERIFY(schema); + auto data = NOlap::NDataSharing::NEvents::TPathIdData::BuildFromProto(i, schema->GetIndexInfo()); + AFL_VERIFY(data.IsSuccess())("error", data.GetErrorMessage()); + AFL_VERIFY(dataByPathId.emplace(i.GetPathId(), data.DetachResult()).second); + } + + auto txConclusion = currentSession->ReceiveData(this, dataByPathId, ev->Get()->Record.GetPackIdx(), (NOlap::TTabletId)ev->Get()->Record.GetSourceTabletId(), currentSession); + if (!txConclusion) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_received_data"); + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "on_received_data"); + Execute(txConclusion->release(), ctx); + } +}; + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvAckDataToSource::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvAckDataToSource"); + auto currentSession = SharingSessionsManager->GetSourceSession(ev->Get()->Record.GetSessionId()); + if (!currentSession) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "ignore_inactual_sharing_session")("sesion_id", ev->Get()->Record.GetSessionId()); + return; + } + + auto txConclusion = currentSession->AckData(this, ev->Get()->Record.GetPackIdx(), currentSession); + if (!txConclusion) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_ack_data"); + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "on_ack_data"); + Execute(txConclusion->release(), ctx); + } +}; + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvAckFinishToSource::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvAckFinishToSource"); + auto currentSession = SharingSessionsManager->GetSourceSession(ev->Get()->Record.GetSessionId()); + if (!currentSession) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "ignore_inactual_sharing_session")("sesion_id", ev->Get()->Record.GetSessionId()); + return; + } + + auto txConclusion = currentSession->AckFinished(this, currentSession); + if (!txConclusion) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_ack_finish"); + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "on_ack_finish"); + Execute(txConclusion->release(), ctx); + } +}; + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvFinishedFromSource::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvFinishedFromSource"); + auto currentSession = SharingSessionsManager->GetDestinationSession(ev->Get()->Record.GetSessionId()); + if (!currentSession) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "ignore_inactual_sharing_session")("sesion_id", ev->Get()->Record.GetSessionId()); + return; + } + + auto txConclusion = currentSession->ReceiveFinished(this, (NOlap::TTabletId)ev->Get()->Record.GetSourceTabletId(), currentSession); + if (!txConclusion) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_finished_data")("error", txConclusion.GetErrorMessage()); + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "on_finished_data"); + Execute(txConclusion->release(), ctx); + } +}; + +void TColumnShard::Handle(NOlap::NExport::NEvents::TEvExportSaveCursor::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "Export")("event", "NExport::NEvents::TEvExportSaveCursor"); + auto currentSession = ExportsManager->GetSessionOptional(ev->Get()->GetIdentifier()); + if (!currentSession) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "ignore_inactual_export_session")("sesion_id", ev->Get()->GetIdentifier().ToString()); + return; + } + + auto txConclusion = currentSession->SaveCursorTx(this, ev->Get()->DetachCursor(), currentSession); + AFL_VERIFY(txConclusion.IsSuccess())("error", txConclusion.GetErrorMessage()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "on_save_cursor")("id", ev->Get()->GetIdentifier().ToString()); + Execute(txConclusion->release(), ctx); +} + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvAckFinishFromInitiator::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvAckFinishFromInitiator"); + auto currentSession = SharingSessionsManager->GetDestinationSession(ev->Get()->Record.GetSessionId()); + if (!currentSession) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "ignore_inactual_sharing_session")("sesion_id", ev->Get()->Record.GetSessionId()); + return; + } + + auto txConclusion = currentSession->AckInitiatorFinished(this, currentSession); + if (!txConclusion) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_initiator_ack_finished_data"); + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "on_initiator_ack_finished_data"); + Execute(txConclusion->release(), ctx); + } +}; + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvApplyLinksModification::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvApplyLinksModification")("info", ev->Get()->Record.DebugString()); + NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvChangeBlobsOwning"); + + auto task = std::make_shared((NOlap::TTabletId)TabletID()); + auto parsed = task->DeserializeFromProto(ev->Get()->Record.GetTask()); + AFL_VERIFY(!!parsed)("error", parsed.GetErrorMessage()); + + AFL_VERIFY(task->GetTabletId() == (NOlap::TTabletId)TabletID()); + auto txConclusion = task->BuildModificationTransaction(this, (NOlap::TTabletId)ev->Get()->Record.GetInitiatorTabletId(), ev->Get()->Record.GetSessionId(), ev->Get()->Record.GetPackIdx(), task); + AFL_VERIFY(!!txConclusion)("error", txConclusion.GetErrorMessage()); + Execute(txConclusion->release(), ctx); +} + +void TColumnShard::Handle(NOlap::NDataSharing::NEvents::TEvApplyLinksModificationFinished::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvApplyLinksModificationFinished"); + auto currentSession = SharingSessionsManager->GetSourceSession(ev->Get()->Record.GetSessionId()); + if (!currentSession) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "ignore_inactual_sharing_session")("sesion_id", ev->Get()->Record.GetSessionId()); + return; + } + const NOlap::TTabletId modifiedTabletId = (NOlap::TTabletId)ev->Get()->Record.GetModifiedTabletId(); + auto txConclusion = currentSession->AckLinks(this, modifiedTabletId, ev->Get()->Record.GetPackIdx(), currentSession); + + if (!txConclusion) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_change_links_finish")("error", txConclusion.GetErrorMessage())("tablet_id", modifiedTabletId); + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "on_change_links_finish")("tablet_id", modifiedTabletId); + Execute(txConclusion->release(), ctx); + } +} + +void TColumnShard::Handle(NOlap::NBlobOperations::NEvents::TEvDeleteSharedBlobs::TPtr& ev, const TActorContext& ctx) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("process", "BlobsSharing")("event", "TEvDeleteSharedBlobs"); + NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", TabletID())("event", "TEvDeleteSharedBlobs"); + auto removeAction = StoragesManager->GetOperator(ev->Get()->Record.GetStorageId())->StartDeclareRemovingAction(NOlap::NBlobOperations::EConsumer::CLEANUP_SHARED_BLOBS); + for (auto&& i : ev->Get()->Record.GetBlobIds()) { + auto blobId = NOlap::TUnifiedBlobId::BuildFromString(i, nullptr); + AFL_VERIFY(!!blobId)("problem", blobId.GetErrorMessage()); + removeAction->DeclareRemove((NOlap::TTabletId)ev->Get()->Record.GetSourceTabletId(), *blobId); + } + Execute(new TTxRemoveSharedBlobs(this, removeAction, NActors::ActorIdFromProto(ev->Get()->Record.GetSourceActorId())), ctx); +} + void TColumnShard::Handle(NMetadata::NProvider::TEvRefreshSubscriberData::TPtr& ev) { Y_ABORT_UNLESS(Tiers); AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "TEvRefreshSubscriberData")("snapshot", ev->Get()->GetSnapshot()->SerializeToString()); Tiers->TakeConfigs(ev->Get()->GetSnapshot(), nullptr); } -void TColumnShard::ActivateTiering(const ui64 pathId, const TString& useTiering, const bool onTabletInit) { - Y_ABORT_UNLESS(!!Tiers); - if (!!Tiers) { - if (useTiering) { - Tiers->EnablePathId(pathId, useTiering); - } else { - Tiers->DisablePathId(pathId); - } +void TColumnShard::ActivateTiering(const ui64 pathId, const TString& useTiering) { + AFL_VERIFY(Tiers); + if (useTiering) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "activate_tiering")("path_id", pathId)("tiering", useTiering); } - if (!onTabletInit) { - OnTieringModified(); + if (useTiering) { + Tiers->EnablePathId(pathId, useTiering); + } else { + Tiers->DisablePathId(pathId); } + OnTieringModified(pathId); } void TColumnShard::Enqueue(STFUNC_SIG) { @@ -858,12 +1113,19 @@ void TColumnShard::Enqueue(STFUNC_SIG) { } } -void TColumnShard::OnTieringModified() { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnTieringModified"); - StoragesManager->OnTieringModified(Tiers); - if (TablesManager.HasPrimaryIndex()) { - TablesManager.MutablePrimaryIndex().OnTieringModified(Tiers, TablesManager.GetTtl()); +void TColumnShard::OnTieringModified(const std::optional pathId) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnTieringModified")("path_id", pathId); + if (Tiers->IsReady()) { + StoragesManager->OnTieringModified(Tiers); + if (TablesManager.HasPrimaryIndex()) { + TablesManager.MutablePrimaryIndex().OnTieringModified(Tiers, TablesManager.GetTtl(), pathId); + } } } +const NKikimr::NColumnShard::NTiers::TManager* TColumnShard::GetTierManagerPointer(const TString& tierId) const { + Y_ABORT_UNLESS(!!Tiers); + return Tiers->GetManagerOptional(tierId); +} + } diff --git a/ydb/core/tx/columnshard/columnshard_impl.h b/ydb/core/tx/columnshard/columnshard_impl.h index 126e163c8fe3..6132d60043d7 100644 --- a/ydb/core/tx/columnshard/columnshard_impl.h +++ b/ydb/core/tx/columnshard/columnshard_impl.h @@ -6,8 +6,9 @@ #include "columnshard_common.h" #include "columnshard_ttl.h" #include "columnshard_private_events.h" -#include "blob_manager.h" #include "tables_manager.h" + +#include "blobs_action/events/delete_blobs.h" #include "transactions/tx_controller.h" #include "inflight_request_tracker.h" #include "counters/columnshard.h" @@ -15,6 +16,17 @@ #include "resource_subscriber/task.h" #include "normalizer/abstract/abstract.h" +#include "export/events/events.h" + +#include "data_sharing/destination/events/control.h" +#include "data_sharing/source/events/control.h" +#include "data_sharing/destination/events/transfer.h" +#include "data_sharing/source/events/transfer.h" +#include "data_sharing/manager/sessions.h" +#include "data_sharing/manager/shared_blobs.h" +#include "data_sharing/common/transactions/tx_extension.h" +#include "data_sharing/modification/events/change_owning.h" + #include #include #include @@ -22,17 +34,38 @@ #include #include #include -#include #include #include #include +#include namespace NKikimr::NOlap { -class TCleanupColumnEngineChanges; +class TCleanupPortionsColumnEngineChanges; +class TCleanupTablesColumnEngineChanges; class TTTLColumnEngineChanges; class TChangesWithAppend; class TCompactColumnEngineChanges; class TInsertColumnEngineChanges; +class TStoragesManager; + +namespace NReader { +class TTxScan; +namespace NPlain { +class TIndexScannerConstructor; +} +} + +namespace NDataSharing { +class TTxDataFromSource; +class TTxDataAckToSource; +class TTxFinishAckToSource; +class TTxFinishAckFromInitiator; +} + +namespace NExport { +class TExportsManager; +} + namespace NBlobOperations { namespace NBlobStorage { class TWriteAction; @@ -49,7 +82,9 @@ class TGeneralCompactColumnEngineChanges; namespace NKikimr::NColumnShard { + class TTxInsertTableCleanup; +class TTxRemoveSharedBlobs; class TOperationsManager; extern bool gAllowLogBatchingDefaultValue; @@ -105,7 +140,6 @@ class TColumnShard friend class TTxWrite; friend class TTxReadBase; friend class TTxRead; - friend class TTxScan; friend class TTxWriteIndex; friend class TTxExportFinish; friend class TTxRunGC; @@ -113,8 +147,10 @@ class TColumnShard friend class TTxReadBlobRanges; friend class TTxApplyNormalizer; friend class TTxMonitoring; + friend class TTxRemoveSharedBlobs; - friend class NOlap::TCleanupColumnEngineChanges; + friend class NOlap::TCleanupPortionsColumnEngineChanges; + friend class NOlap::TCleanupTablesColumnEngineChanges; friend class NOlap::TTTLColumnEngineChanges; friend class NOlap::TChangesWithAppend; friend class NOlap::TCompactColumnEngineChanges; @@ -125,6 +161,16 @@ class TColumnShard friend class NOlap::NBlobOperations::NBlobStorage::TOperator; friend class NOlap::NBlobOperations::NTier::TOperator; + friend class NOlap::NDataSharing::TTxDataFromSource; + friend class NOlap::NDataSharing::TTxDataAckToSource; + friend class NOlap::NDataSharing::TTxFinishAckToSource; + friend class NOlap::NDataSharing::TTxFinishAckFromInitiator; + + friend class NOlap::TStoragesManager; + + friend class NOlap::NReader::TTxScan; + friend class NOlap::NReader::NPlain::TIndexScannerConstructor; + class TStoragesManager; friend class TTxController; @@ -134,6 +180,7 @@ class TColumnShard friend class TSchemaTransactionOperator; friend class TLongTxTransactionOperator; friend class TEvWriteTransactionOperator; + friend class TBackupTransactionOperator; class TTxProgressTx; class TTxProposeCancel; @@ -163,6 +210,24 @@ class TColumnShard void Handle(TEvPrivate::TEvTieringModified::TPtr& ev, const TActorContext&); void Handle(TEvPrivate::TEvNormalizerResult::TPtr& ev, const TActorContext&); + void Handle(NActors::TEvents::TEvUndelivered::TPtr& ev, const TActorContext&); + + void Handle(NOlap::NBlobOperations::NEvents::TEvDeleteSharedBlobs::TPtr& ev, const TActorContext& ctx); + + void Handle(NOlap::NDataSharing::NEvents::TEvApplyLinksModification::TPtr& ev, const TActorContext& ctx); + void Handle(NOlap::NDataSharing::NEvents::TEvApplyLinksModificationFinished::TPtr& ev, const TActorContext& ctx); + + void Handle(NOlap::NDataSharing::NEvents::TEvProposeFromInitiator::TPtr& ev, const TActorContext& ctx); + void Handle(NOlap::NDataSharing::NEvents::TEvConfirmFromInitiator::TPtr& ev, const TActorContext& ctx); + void Handle(NOlap::NDataSharing::NEvents::TEvStartToSource::TPtr& ev, const TActorContext& ctx); + void Handle(NOlap::NDataSharing::NEvents::TEvSendDataFromSource::TPtr& ev, const TActorContext& ctx); + void Handle(NOlap::NDataSharing::NEvents::TEvAckDataToSource::TPtr& ev, const TActorContext& ctx); + void Handle(NOlap::NDataSharing::NEvents::TEvFinishedFromSource::TPtr& ev, const TActorContext& ctx); + void Handle(NOlap::NDataSharing::NEvents::TEvAckFinishToSource::TPtr& ev, const TActorContext& ctx); + void Handle(NOlap::NDataSharing::NEvents::TEvAckFinishFromInitiator::TPtr& ev, const TActorContext& ctx); + + void Handle(NOlap::NExport::NEvents::TEvExportSaveCursor::TPtr& ev, const TActorContext& ctx); + ITransaction* CreateTxInitSchema(); void OnActivateExecutor(const TActorContext& ctx) override; @@ -175,10 +240,7 @@ class TColumnShard Y_UNUSED(ctx); } - const NTiers::TManager* GetTierManagerPointer(const TString& tierId) const { - Y_ABORT_UNLESS(!!Tiers); - return Tiers->GetManagerOptional(tierId); - } + const NTiers::TManager* GetTierManagerPointer(const TString& tierId) const; void Die(const TActorContext& ctx) override; @@ -206,24 +268,21 @@ class TColumnShard TabletCounters->Cumulative()[counter].Increment(num); } - void IncCounter(NColumnShard::EPercentileCounters counter, const TDuration& latency) const { - TabletCounters->Percentile()[counter].IncrementFor(latency.MicroSeconds()); - } - - void ActivateTiering(const ui64 pathId, const TString& useTiering, const bool onTabletInit = false); - void OnTieringModified(); + void ActivateTiering(const ui64 pathId, const TString& useTiering); + void OnTieringModified(const std::optional pathId = {}); public: enum class EOverloadStatus { ShardTxInFly /* "shard_tx" */, ShardWritesInFly /* "shard_writes" */, ShardWritesSizeInFly /* "shard_writes_size" */, InsertTable /* "insert_table" */, + OverloadMetadata /* "overload_metadata" */, Disk /* "disk" */, None /* "none" */ }; private: - void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, std::unique_ptr&& event, const TActorContext& ctx); + void OverloadWriteFail(const EOverloadStatus overloadReason, const NEvWrite::TWriteData& writeData, const ui64 cookie, std::unique_ptr&& event, const TActorContext& ctx); EOverloadStatus CheckOverloaded(const ui64 tableId) const; protected: @@ -240,7 +299,7 @@ class TColumnShard LOG_S_WARN("TColumnShard.StateBroken at " << TabletID() << " unhandled event type: " << ev->GetTypeRewrite() << " event: " << ev->ToString()); - Send(IEventHandle::ForwardOnNondelivery(std::move(ev), TEvents::TEvUndelivered::ReasonActorUnknown)); + Send(IEventHandle::ForwardOnNondelivery(std::move(ev), NActors::TEvents::TEvUndelivered::ReasonActorUnknown)); break; } } @@ -273,6 +332,23 @@ class TColumnShard HFunc(TEvPrivate::TEvWriteDraft, Handle); HFunc(TEvPrivate::TEvGarbageCollectionFinished, Handle); HFunc(TEvPrivate::TEvTieringModified, Handle); + + HFunc(NActors::TEvents::TEvUndelivered, Handle); + + HFunc(NOlap::NBlobOperations::NEvents::TEvDeleteSharedBlobs, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvApplyLinksModification, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvApplyLinksModificationFinished, Handle); + + HFunc(NOlap::NDataSharing::NEvents::TEvProposeFromInitiator, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvConfirmFromInitiator, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvStartToSource, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvSendDataFromSource, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvAckDataToSource, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvFinishedFromSource, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvAckFinishToSource, Handle); + HFunc(NOlap::NDataSharing::NEvents::TEvAckFinishFromInitiator, Handle); + + HFunc(NOlap::NExport::NEvents::TEvExportSaveCursor, Handle); default: if (!HandleDefaultEvents(ev, SelfId())) { LOG_S_WARN("TColumnShard.StateWork at " << TabletID() @@ -286,6 +362,10 @@ class TColumnShard private: std::unique_ptr ProgressTxController; std::unique_ptr OperationsManager; + std::shared_ptr SharingSessionsManager; + std::shared_ptr StoragesManager; + std::shared_ptr ExportsManager; + std::shared_ptr DataLocksManager; using TSchemaPreset = TSchemaPreset; using TTableInfo = TTableInfo; @@ -356,14 +436,12 @@ class TColumnShard ui64 LastExportNo = 0; ui64 OwnerPathId = 0; - ui64 TabletTxCounter = 0; ui64 StatsReportRound = 0; TString OwnerPath; TIntrusivePtr MediatorTimeCastEntry; bool MediatorTimeCastRegistered = false; TSet MediatorTimeCastWaitingSteps; - TDuration MaxReadStaleness = TDuration::Minutes(5); // TODO: Make configurable? const TDuration PeriodicWakeupActivationPeriod; TDuration FailActivationDelay = TDuration::Seconds(1); const TDuration StatsReportInterval; @@ -374,7 +452,6 @@ class TColumnShard TActorId BufferizationWriteActorId; TActorId StatsReportPipe; - std::shared_ptr StoragesManager; TInFlightReadsTracker InFlightReadsTracker; TTablesManager TablesManager; std::shared_ptr Tiers; @@ -386,7 +463,6 @@ class TColumnShard NOlap::NResourceBroker::NSubscribe::TTaskContext InsertTaskSubscription; NOlap::NResourceBroker::NSubscribe::TTaskContext CompactTaskSubscription; NOlap::NResourceBroker::NSubscribe::TTaskContext TTLTaskSubscription; - const TScanCounters ReadCounters; const TScanCounters ScanCounters; const TIndexationCounters CompactionCounters = TIndexationCounters("GeneralCompaction"); const TIndexationCounters IndexationCounters = TIndexationCounters("Indexation"); @@ -394,7 +470,6 @@ class TColumnShard const TCSCounters CSCounters; TWritesMonitor WritesMonitor; - bool ProgressTxInFlight = false; THashMap ScanTxInFlight; THashMap LongTxWrites; @@ -404,7 +479,6 @@ class TColumnShard TBackgroundController BackgroundController; TSettings Settings; TLimits Limits; - TCompactionLimits CompactionLimits; NOlap::TNormalizationController NormalizerController; void TryRegisterMediatorTimeCast(); @@ -416,6 +490,10 @@ class TColumnShard NOlap::TSnapshot GetMaxReadVersion() const; ui64 GetMinReadStep() const; ui64 GetOutdatedStep() const; + TDuration GetTxCompleteLag() const { + ui64 mediatorTime = MediatorTimeCastEntry ? MediatorTimeCastEntry->Get(TabletID()) : 0; + return ProgressTxController->GetTxCompleteLag(mediatorTime); + } TWriteId HasLongTxWrite(const NLongTxService::TLongTxId& longTxId, const ui32 partId); TWriteId GetLongTxWrite(NIceDb::TNiceDb& db, const NLongTxService::TLongTxId& longTxId, const ui32 partId); @@ -428,7 +506,7 @@ class TColumnShard TWriteId BuildNextWriteId(NIceDb::TNiceDb& db); void EnqueueProgressTx(const TActorContext& ctx); - void EnqueueBackgroundActivities(bool periodic = false, TBackgroundActivity activity = TBackgroundActivity::All()); + void EnqueueBackgroundActivities(const bool periodic = false); virtual void Enqueue(STFUNC_SIG) override; void UpdateSchemaSeqNo(const TMessageSeqNo& seqNo, NTabletFlatExecutor::TTransactionContext& txc); @@ -444,8 +522,9 @@ class TColumnShard void StartIndexTask(std::vector&& dataToIndex, const i64 bytesToIndex); void SetupIndexation(); void SetupCompaction(); - bool SetupTtl(const THashMap& pathTtls = {}, const bool force = false); - void SetupCleanup(); + bool SetupTtl(const THashMap& pathTtls = {}); + void SetupCleanupPortions(); + void SetupCleanupTables(); void SetupCleanupInsertTable(); void SetupGC(); @@ -460,14 +539,55 @@ class TColumnShard void ConfigureStats(const NOlap::TColumnEngineStats& indexStats, ::NKikimrTableStats::TTableStats* tabletStats); void FillTxTableStats(::NKikimrTableStats::TTableStats* tableStats) const; - static TDuration GetControllerPeriodicWakeupActivationPeriod(); - static TDuration GetControllerStatsReportInterval(); - public: + ui64 TabletTxCounter = 0; + + template + const T& GetIndexAs() const { + return TablesManager.GetPrimaryIndexAsVerified(); + } + + const NOlap::IColumnEngine* GetIndexOptional() const { + return TablesManager.GetPrimaryIndex() ? TablesManager.GetPrimaryIndex().get() : nullptr; + } + + template + T& MutableIndexAs() { + return TablesManager.MutablePrimaryIndexAsVerified(); + } + + TTxController& GetProgressTxController() const { + AFL_VERIFY(ProgressTxController); + return *ProgressTxController; + } + + bool HasIndex() const { + return !!TablesManager.GetPrimaryIndex(); + } + + NOlap::TSnapshot GetLastPlannedSnapshot() const { + return NOlap::TSnapshot(LastPlannedStep, LastPlannedTxId); + } + + const std::shared_ptr& GetExportsManager() const { + return ExportsManager; + } + const std::shared_ptr& GetStoragesManager() const { + AFL_VERIFY(StoragesManager); return StoragesManager; } + const std::shared_ptr& GetDataLocksManager() const { + AFL_VERIFY(DataLocksManager); + return DataLocksManager; + } + + const NOlap::TInsertTable& GetInsertTable() const { + AFL_VERIFY(!!InsertTable); + return *InsertTable; + } + static constexpr NKikimrServices::TActivity::EType ActorActivityType() { return NKikimrServices::TActivity::TX_COLUMNSHARD_ACTOR; } diff --git a/ydb/core/tx/columnshard/columnshard_private_events.h b/ydb/core/tx/columnshard/columnshard_private_events.h index e258f724fcf3..70c1870aa34a 100644 --- a/ydb/core/tx/columnshard/columnshard_private_events.h +++ b/ydb/core/tx/columnshard/columnshard_private_events.h @@ -1,6 +1,5 @@ #pragma once -#include "blob_manager.h" #include "blobs_action/abstract/gc.h" #include "defs.h" @@ -37,6 +36,11 @@ struct TEvPrivate { EvWritingAddDataToBuffer, EvWritingFlushBuffer, + EvExportWritingFinished, + EvExportWritingFailed, + EvExportCursorSaved, + EvExportSaveCursor, + EvEnd }; @@ -76,7 +80,7 @@ struct TEvPrivate { /// Common event for Indexing and GranuleCompaction: write index data in TTxWriteIndex transaction. struct TEvWriteIndex : public TEventLocal { - NOlap::TVersionedIndex IndexInfo; + std::shared_ptr IndexInfo; std::shared_ptr IndexChanges; bool GranuleCompaction{false}; TUsage ResourceUsage; @@ -84,10 +88,10 @@ struct TEvPrivate { TDuration Duration; TBlobPutResult::TPtr PutResult; - TEvWriteIndex(NOlap::TVersionedIndex&& indexInfo, + TEvWriteIndex(const std::shared_ptr& indexInfo, std::shared_ptr indexChanges, bool cacheData) - : IndexInfo(std::move(indexInfo)) + : IndexInfo(indexInfo) , IndexChanges(indexChanges) , CacheData(cacheData) { diff --git a/ydb/core/tx/columnshard/columnshard_schema.h b/ydb/core/tx/columnshard/columnshard_schema.h index 1690fa0bfec7..25d7c11fcca4 100644 --- a/ydb/core/tx/columnshard/columnshard_schema.h +++ b/ydb/core/tx/columnshard/columnshard_schema.h @@ -38,13 +38,27 @@ struct Schema : NIceDb::Schema { ColumnsTableId, CountersTableId, OperationsTableId, - IndexesTableId + IndexesTableId, + + LocksTableId, + LockRangesTableId, + LockConflictsTableId, + LockVolatileDependenciesTableId, + + SharedBlobIdsTableId, + BorrowedBlobIdsTableId, + SourceSessionsTableId, + DestinationSessionsTableId, + OperationTxIdsId, + BackupIdsDeprecated, + ExportSessionsId }; enum class ETierTables: ui32 { TierBlobsDraft = 1024, TierBlobsToKeep, - TierBlobsToDelete + TierBlobsToDelete, + TierBlobsToDeleteWT }; enum class EValueIds : ui32 { @@ -60,6 +74,9 @@ struct Schema : NIceDb::Schema { LastExportNumber = 10, OwnerPathId = 11, OwnerPath = 12, + LastCompletedStep = 13, + LastCompletedTxId = 14, + LastNormalizerSequentialId = 15, }; enum class EInsertTableIds : ui8 { @@ -68,9 +85,26 @@ struct Schema : NIceDb::Schema { Aborted = 2, }; + enum class ECommonTables { + Value = 1, + TxInfo = 2, + SchemaPresetInfo = 3, + TtlSettingsPresetInfo = 4, + TableInfo = 5, + LongTxWrites = 6, + BlobsToKeep = 7, + BlobsToDelete = 8, + SchemaPresetVersionInfo = 9, + TtlSettingsPresetVersionInfo = 10, + TableVersionInfo = 11, + SmallBlobs = 12, + OneToOneEvictedBlobs = 13, + BlobsToDeleteWT = 14 + }; + // Tablet tables - struct Value : Table<1> { + struct Value : Table<(ui32)ECommonTables::Value> { struct Id : Column<1, NScheme::NTypeIds::Uint32> {}; // one of EValueIds struct Digit : Column<2, NScheme::NTypeIds::Uint64> {}; struct Bytes : Column<3, NScheme::NTypeIds::String> {}; @@ -79,7 +113,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct TxInfo : Table<2> { + struct TxInfo : Table<(ui32)ECommonTables::TxInfo> { struct TxId : Column<1, NScheme::NTypeIds::Uint64> {}; struct TxKind : Column<2, NScheme::NTypeIds::Uint32> { using Type = NKikimrTxColumnShard::ETransactionKind; }; struct TxBody : Column<3, NScheme::NTypeIds::String> {}; @@ -92,7 +126,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct SchemaPresetInfo : Table<3> { + struct SchemaPresetInfo : Table<(ui32)ECommonTables::SchemaPresetInfo> { struct Id : Column<1, NScheme::NTypeIds::Uint32> {}; struct Name : Column<2, NScheme::NTypeIds::Utf8> {}; struct DropStep : Column<3, NScheme::NTypeIds::Uint64> {}; @@ -102,7 +136,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct SchemaPresetVersionInfo : Table<9> { + struct SchemaPresetVersionInfo : Table<(ui32)ECommonTables::SchemaPresetVersionInfo> { struct Id : Column<1, NScheme::NTypeIds::Uint32> {}; struct SinceStep : Column<2, NScheme::NTypeIds::Uint64> {}; struct SinceTxId : Column<3, NScheme::NTypeIds::Uint64> {}; @@ -112,7 +146,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct TtlSettingsPresetInfo : Table<4> { + struct TtlSettingsPresetInfo : Table<(ui32)ECommonTables::TtlSettingsPresetInfo> { struct Id : Column<1, NScheme::NTypeIds::Uint32> {}; struct Name : Column<2, NScheme::NTypeIds::Utf8> {}; struct DropStep : Column<3, NScheme::NTypeIds::Uint64> {}; @@ -122,7 +156,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct TtlSettingsPresetVersionInfo : Table<10> { + struct TtlSettingsPresetVersionInfo : Table<(ui32)ECommonTables::TtlSettingsPresetVersionInfo> { struct Id : Column<1, NScheme::NTypeIds::Uint32> {}; struct SinceStep : Column<2, NScheme::NTypeIds::Uint64> {}; struct SinceTxId : Column<3, NScheme::NTypeIds::Uint64> {}; @@ -132,7 +166,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct TableInfo : Table<5> { + struct TableInfo : Table<(ui32)ECommonTables::TableInfo> { struct PathId : Column<1, NScheme::NTypeIds::Uint64> {}; struct DropStep : Column<2, NScheme::NTypeIds::Uint64> {}; struct DropTxId : Column<3, NScheme::NTypeIds::Uint64> {}; @@ -142,7 +176,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct TableVersionInfo : Table<11> { + struct TableVersionInfo : Table<(ui32)ECommonTables::TableVersionInfo> { struct PathId : Column<1, NScheme::NTypeIds::Uint64> {}; struct SinceStep : Column<2, NScheme::NTypeIds::Uint64> {}; struct SinceTxId : Column<3, NScheme::NTypeIds::Uint64> {}; @@ -152,7 +186,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct LongTxWrites : Table<6> { + struct LongTxWrites : Table<(ui32)ECommonTables::LongTxWrites> { struct WriteId: Column<1, NScheme::NTypeIds::Uint64> {}; struct LongTxId : Column<2, NScheme::NTypeIds::String> {}; struct WritePartId: Column<3, NScheme::NTypeIds::Uint32> {}; @@ -161,21 +195,21 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct BlobsToKeep : Table<7> { + struct BlobsToKeep : Table<(ui32)ECommonTables::BlobsToKeep> { struct BlobId : Column<1, NScheme::NTypeIds::String> {}; using TKey = TableKey; using TColumns = TableColumns; }; - struct BlobsToDelete : Table<8> { - struct BlobId : Column<1, NScheme::NTypeIds::String> {}; + struct BlobsToDelete: Table<(ui32)ECommonTables::BlobsToDelete> { + struct BlobId: Column<1, NScheme::NTypeIds::String> {}; using TKey = TableKey; using TColumns = TableColumns; }; - struct SmallBlobs : Table<12> { + struct SmallBlobs : Table<(ui32)ECommonTables::SmallBlobs> { struct BlobId : Column<1, NScheme::NTypeIds::String> {}; struct Data : Column<2, NScheme::NTypeIds::String> {}; @@ -183,7 +217,7 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; - struct OneToOneEvictedBlobs : Table<13> { + struct OneToOneEvictedBlobs : Table<(ui32)ECommonTables::OneToOneEvictedBlobs> { struct BlobId : Column<1, NScheme::NTypeIds::String> {}; struct Size : Column<2, NScheme::NTypeIds::Uint32> {}; // extracted from BlobId for better introspection struct State : Column<3, NScheme::NTypeIds::Byte> {}; // evicting -> (self) cached <-> exported @@ -197,6 +231,14 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; + struct BlobsToDeleteWT: Table<(ui32)ECommonTables::BlobsToDeleteWT> { + struct BlobId: Column<1, NScheme::NTypeIds::String> {}; + struct TabletId: Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + // Index tables // InsertTable - common for all indices @@ -264,14 +306,33 @@ struct Schema : NIceDb::Schema { struct Operations : NIceDb::Schema::Table { struct WriteId : Column<1, NScheme::NTypeIds::Uint64> {}; - struct TxId : Column<2, NScheme::NTypeIds::Uint64> {}; + struct LockId : Column<2, NScheme::NTypeIds::Uint64> {}; struct Status : Column<3, NScheme::NTypeIds::Uint32> {}; struct CreatedAt : Column<4, NScheme::NTypeIds::Uint64> {}; struct GlobalWriteId : Column<5, NScheme::NTypeIds::Uint64> {}; struct Metadata : Column<6, NScheme::NTypeIds::String> {}; + struct Cookie : Column<7, NScheme::NTypeIds::Uint64> {}; using TKey = TableKey; - using TColumns = TableColumns; + using TColumns = TableColumns; + }; + + struct OperationTxIds : NIceDb::Schema::Table { + struct TxId : Column<1, NScheme::NTypeIds::Uint64> {}; + struct LockId : Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct ExportPersistentSessions : NIceDb::Schema::Table { + struct Identifier : Column<1, NScheme::NTypeIds::String> {}; + struct Status: Column<2, NScheme::NTypeIds::String> {}; + struct Task: Column<3, NScheme::NTypeIds::String> {}; + struct Cursor: Column<4, NScheme::NTypeIds::String> {}; + + using TKey = TableKey; + using TColumns = TableColumns; }; struct TierBlobsDraft: NIceDb::Schema::Table<(ui32)ETierTables::TierBlobsDraft> { @@ -290,6 +351,15 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; + struct TierBlobsToDeleteWT: NIceDb::Schema::Table<(ui32)ETierTables::TierBlobsToDeleteWT> { + struct StorageId: Column<1, NScheme::NTypeIds::String> {}; + struct BlobId: Column<2, NScheme::NTypeIds::String> {}; + struct TabletId: Column<3, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + struct IndexIndexes: NIceDb::Schema::Table { struct PathId: Column<1, NScheme::NTypeIds::Uint64> {}; struct PortionId: Column<2, NScheme::NTypeIds::Uint64> {}; @@ -305,6 +375,83 @@ struct Schema : NIceDb::Schema { using TColumns = TableColumns; }; + struct SharedBlobIds: NIceDb::Schema::Table { + struct StorageId: Column<1, NScheme::NTypeIds::String> {}; + struct BlobId: Column<2, NScheme::NTypeIds::String> {}; + struct TabletId: Column<3, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct BorrowedBlobIds: NIceDb::Schema::Table { + struct StorageId: Column<1, NScheme::NTypeIds::String> {}; + struct BlobId: Column<2, NScheme::NTypeIds::String> {}; + struct TabletId: Column<3, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct SourceSessions: NIceDb::Schema::Table { + struct SessionId: Column<1, NScheme::NTypeIds::String> {}; + struct Details: Column<2, NScheme::NTypeIds::String> {}; + struct CursorDynamic: Column<3, NScheme::NTypeIds::String> {}; + struct CursorStatic: Column<4, NScheme::NTypeIds::String> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct DestinationSessions: NIceDb::Schema::Table { + struct SessionId: Column<1, NScheme::NTypeIds::String> {}; + struct Details: Column<2, NScheme::NTypeIds::String> {}; + struct Cursor: Column<3, NScheme::NTypeIds::String> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct Locks : Table { + struct LockId : Column<1, NScheme::NTypeIds::Uint64> {}; + struct LockNodeId : Column<2, NScheme::NTypeIds::Uint32> {}; + struct Generation : Column<3, NScheme::NTypeIds::Uint32> {}; + struct Counter : Column<4, NScheme::NTypeIds::Uint64> {}; + struct CreateTimestamp : Column<5, NScheme::NTypeIds::Uint64> {}; + struct Flags : Column<6, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct LockRanges : Table { + struct LockId : Column<1, NScheme::NTypeIds::Uint64> {}; + struct RangeId : Column<2, NScheme::NTypeIds::Uint64> {}; + struct PathOwnerId : Column<3, NScheme::NTypeIds::Uint64> {}; + struct LocalPathId : Column<4, NScheme::NTypeIds::Uint64> {}; + struct Flags : Column<5, NScheme::NTypeIds::Uint64> {}; + struct Data : Column<6, NScheme::NTypeIds::String> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct LockConflicts : Table { + struct LockId : Column<1, NScheme::NTypeIds::Uint64> {}; + struct ConflictId : Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + + struct LockVolatileDependencies : Table { + struct LockId : Column<1, NScheme::NTypeIds::Uint64> {}; + struct TxId : Column<2, NScheme::NTypeIds::Uint64> {}; + + using TKey = TableKey; + using TColumns = TableColumns; + }; + using TTables = SchemaTables< Value, TxInfo, @@ -317,6 +464,7 @@ struct Schema : NIceDb::Schema { LongTxWrites, BlobsToKeep, BlobsToDelete, + BlobsToDeleteWT, InsertTable, IndexGranules, IndexColumns, @@ -326,7 +474,14 @@ struct Schema : NIceDb::Schema { Operations, TierBlobsDraft, TierBlobsToDelete, - IndexIndexes + TierBlobsToDeleteWT, + IndexIndexes, + SharedBlobIds, + BorrowedBlobIds, + SourceSessions, + DestinationSessions, + OperationTxIds, + ExportPersistentSessions >; // @@ -347,8 +502,23 @@ struct Schema : NIceDb::Schema { auto rowset = db.Table().Key((ui32)key).Select(); if (rowset.IsReady()) { - if (rowset.IsValid()) + if (rowset.IsValid()) { + value = T{rowset.template GetValue()}; + return true; + } + } + return false; + } + + template + static bool GetSpecialValueOpt(NIceDb::TNiceDb& db, EValueIds key, T& value) { + using TSource = std::conditional_t || std::is_enum_v, Value::Digit, Value::Bytes>; + + auto rowset = db.Table().Key((ui32)key).Select(); + if (rowset.IsReady()) { + if (rowset.IsValid()) { value = T{rowset.template GetValue()}; + } return true; } return false; @@ -548,26 +718,6 @@ struct Schema : NIceDb::Schema { } return true; } - - // Operations - static void Operations_Write(NIceDb::TNiceDb& db, const TWriteOperation& operation) { - TString metadata; - NKikimrTxColumnShard::TInternalOperationData proto; - operation.ToProto(proto); - Y_ABORT_UNLESS(proto.SerializeToString(&metadata)); - - db.Table().Key((ui64)operation.GetWriteId()).Update( - NIceDb::TUpdate((ui32)operation.GetStatus()), - NIceDb::TUpdate(operation.GetCreatedAt().Seconds()), - NIceDb::TUpdate(metadata), - NIceDb::TUpdate(operation.GetTxId()) - ); - } - - static void Operations_Erase(NIceDb::TNiceDb& db, const TWriteId writeId) { - db.Table().Key((ui64)writeId).Delete(); - } - }; } @@ -623,8 +773,8 @@ class TIndexChunkLoadContext { const ui32 RecordsCount; const ui32 RawBytes; public: - TIndexChunk BuildIndexChunk() const { - return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, BlobRange); + TIndexChunk BuildIndexChunk(const TBlobRangeLink16::TLinkId blobLinkId) const { + return TIndexChunk(Address.GetColumnId(), Address.GetChunkIdx(), RecordsCount, RawBytes, BlobRange.BuildLink(blobLinkId)); } template diff --git a/ydb/core/tx/columnshard/columnshard_ttl.h b/ydb/core/tx/columnshard/columnshard_ttl.h index 77d401c64ade..de2378737e95 100644 --- a/ydb/core/tx/columnshard/columnshard_ttl.h +++ b/ydb/core/tx/columnshard/columnshard_ttl.h @@ -65,10 +65,13 @@ class TTtl { PathTtls.erase(pathId); } - void AddTtls(THashMap& eviction) const { + bool AddTtls(THashMap& eviction) const { for (auto& [pathId, descr] : PathTtls) { - eviction[pathId].Ttl = Convert(descr); + if (!eviction[pathId].Add(Convert(descr))) { + return false; + } } + return true; } const THashSet& TtlColumns() const { return Columns; } diff --git a/ydb/core/tx/columnshard/columnshard_ut_common.cpp b/ydb/core/tx/columnshard/columnshard_ut_common.cpp index 407aed03b43d..5e6794ff6cc3 100644 --- a/ydb/core/tx/columnshard/columnshard_ut_common.cpp +++ b/ydb/core/tx/columnshard/columnshard_ut_common.cpp @@ -1,11 +1,14 @@ #include "columnshard_ut_common.h" -#include "columnshard__stats_scan.h" #include "common/tests/shard_reader.h" +#include "engines/reader/sys_view/chunks/chunks.h" +#include #include #include #include +#include +#include #include namespace NKikimr::NTxUT { @@ -123,7 +126,7 @@ bool WriteDataImpl(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shar } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector>& ydbSchema, std::vector* writeIds) { + const std::vector& ydbSchema, std::vector* writeIds) { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); return WriteDataImpl(runtime, sender, shardId, tableId, longTxId, writeId, data, NArrow::MakeArrowSchema(ydbSchema), writeIds); @@ -131,7 +134,7 @@ bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, } bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector>& ydbSchema, bool waitResult, std::vector* writeIds) { + const std::vector& ydbSchema, bool waitResult, std::vector* writeIds) { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); if (writeIds) { @@ -143,7 +146,7 @@ bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, ui64 tableId, const ui64 writePartId, const TString& data, - const std::vector>& ydbSchema) + const std::vector& ydbSchema) { auto write = std::make_unique(sender, longTxId, tableId, "0", data, writePartId); write->SetArrowSchema(NArrow::SerializeSchema(*NArrow::MakeArrowSchema(ydbSchema))); @@ -174,7 +177,7 @@ void ScanIndexStats(TTestBasicRuntime& runtime, TActorId& sender, const std::vec // Schema: pathId, kind, rows, bytes, rawBytes. PK: {pathId, kind} //record.SetSchemaVersion(0); - auto ydbSchema = PrimaryIndexStatsSchema; + auto ydbSchema = NOlap::NReader::NSysView::NChunks::TStatsIterator::StatsSchema; for (const auto& col : ydbSchema.Columns) { record.AddColumnTags(col.second.Id); auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(col.second.PType, col.second.PTypeMod); @@ -295,18 +298,17 @@ std::vector MakeTestCells(const std::vector& types, ui32 value } -TString MakeTestBlob(std::pair range, const std::vector>& columns, +TString MakeTestBlob(std::pair range, const std::vector& columns, const TTestBlobOptions& options, const std::set& notNullColumns) { - TString err; NArrow::TArrowBatchBuilder batchBuilder(arrow::Compression::LZ4_FRAME, notNullColumns); - batchBuilder.Start(columns, 0, 0, err); - + const auto startStatus = batchBuilder.Start(NArrow::NTest::TTestColumn::ConvertToPairs(columns)); + UNIT_ASSERT_C(startStatus.ok(), startStatus.ToString()); std::vector nullPositions; std::vector samePositions; for (size_t i = 0; i < columns.size(); ++i) { - if (options.NullColumns.contains(columns[i].first)) { + if (options.NullColumns.contains(columns[i].GetName())) { nullPositions.push_back(i); - } else if (options.SameValueColumns.contains(columns[i].first)) { + } else if (options.SameValueColumns.contains(columns[i].GetName())) { samePositions.push_back(i); } } @@ -348,7 +350,7 @@ TString MakeTestBlob(std::pair range, const std::vector range, bool inclusiveFrom, bool inclusiveTo, - const std::vector>& columns) { + const std::vector& columns) { std::vector mem; std::vector types = TTestSchema::ExtractTypes(columns); std::vector cellsFrom = MakeTestCells(types, range.first, mem); @@ -392,31 +394,48 @@ NMetadata::NFetcher::ISnapshot::TPtr TTestSchema::BuildSnapshot(const TTableSpec } namespace NKikimr::NColumnShard { - NOlap::TIndexInfo BuildTableInfo(const std::vector>& ydbSchema, - const std::vector>& key) { + NOlap::TIndexInfo BuildTableInfo(const std::vector& ydbSchema, + const std::vector& key) { NOlap::TIndexInfo indexInfo = NOlap::TIndexInfo::BuildDefault(); for (ui32 i = 0; i < ydbSchema.size(); ++i) { ui32 id = i + 1; - auto& name = ydbSchema[i].first; - auto& type = ydbSchema[i].second; + auto& name = ydbSchema[i].GetName(); + auto& type = ydbSchema[i].GetType(); indexInfo.Columns[id] = NTable::TColumn(name, id, type, ""); indexInfo.ColumnNames[name] = id; } - for (const auto& [keyName, keyType] : key) { - indexInfo.KeyColumns.push_back(indexInfo.ColumnNames[keyName]); + for (const auto& c : key) { + indexInfo.KeyColumns.push_back(indexInfo.ColumnNames[c.GetName()]); } - indexInfo.SetAllKeys(); + auto storage = std::make_shared(); + storage->Initialize(); + indexInfo.SetAllKeys(NOlap::TTestStoragesManager::GetInstance()); return indexInfo; } + void SetupSchema(TTestBasicRuntime& runtime, TActorId& sender, const TString& txBody, const NOlap::TSnapshot& snapshot, bool succeed) { + + auto controller = NYDBTest::TControllers::GetControllerAs(); + while (controller && !controller->IsActiveTablet(TTestTxConfig::TxTablet0)) { + runtime.SimulateSleep(TDuration::Seconds(1)); + } + + using namespace NTxUT; + bool ok = ProposeSchemaTx(runtime, sender, txBody, snapshot); + UNIT_ASSERT_VALUES_EQUAL(ok, succeed); + if (succeed) { + PlanSchemaTx(runtime, sender, snapshot); + } + } + void SetupSchema(TTestBasicRuntime& runtime, TActorId& sender, ui64 pathId, const TestTableDescription& table, TString codec) { using namespace NTxUT; - NOlap::TSnapshot snap(10, 10); + NOlap::TSnapshot snapshot(10, 10); TString txBody; auto specials = TTestSchema::TTableSpecials().WithCodec(codec); if (table.InStore) { @@ -424,13 +443,11 @@ namespace NKikimr::NColumnShard { } else { txBody = TTestSchema::CreateStandaloneTableTxBody(pathId, table.Schema, table.Pk, specials); } - bool ok = ProposeSchemaTx(runtime, sender, txBody, snap); - UNIT_ASSERT(ok); - - PlanSchemaTx(runtime, sender, snap); + SetupSchema(runtime, sender, txBody, snapshot, true); } - void PrepareTablet(TTestBasicRuntime& runtime, const ui64 tableId, const std::vector>& schema, const ui32 keySize) { + + void PrepareTablet(TTestBasicRuntime& runtime, const ui64 tableId, const std::vector& schema, const ui32 keySize) { using namespace NTxUT; CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); @@ -449,10 +466,22 @@ namespace NKikimr::NColumnShard { SetupSchema(runtime, sender, tableId, tableDescription); } - std::shared_ptr ReadAllAsBatch(TTestBasicRuntime& runtime, const ui64 tableId, const NOlap::TSnapshot& snapshot, const std::vector>& schema) { + void PrepareTablet(TTestBasicRuntime& runtime, const TString& schemaTxBody, bool succeed) { + using namespace NTxUT; + CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); + + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); + runtime.DispatchEvents(options); + + TActorId sender = runtime.AllocateEdgeActor(); + SetupSchema(runtime, sender, schemaTxBody, NOlap::TSnapshot(1000, 100), succeed); + } + + std::shared_ptr ReadAllAsBatch(TTestBasicRuntime& runtime, const ui64 tableId, const NOlap::TSnapshot& snapshot, const std::vector& schema) { std::vector fields; for (auto&& f : schema) { - fields.emplace_back(f.first); + fields.emplace_back(f.GetName()); } NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, snapshot); diff --git a/ydb/core/tx/columnshard/columnshard_ut_common.h b/ydb/core/tx/columnshard/columnshard_ut_common.h index 58849aa342e1..e5bbe8d10997 100644 --- a/ydb/core/tx/columnshard/columnshard_ut_common.h +++ b/ydb/core/tx/columnshard/columnshard_ut_common.h @@ -3,15 +3,16 @@ #include "columnshard.h" #include "columnshard_impl.h" #include "blob_cache.h" +#include "engines/scheme/statistics/max/operator.h" #include +#include #include #include #include #include #include - namespace NKikimr::NTxUT { // Private events of different actors reuse the same ES_PRIVATE range @@ -50,6 +51,10 @@ struct TTestSchema { : Name(name) {} + TString DebugString() const { + return TStringBuilder() << "{Column=" << TtlColumn << ";EvictAfter=" << EvictAfter.value_or(TDuration::Zero()) << ";Name=" << Name << ";Codec=" << Codec << "};"; + } + NKikimrSchemeOp::EColumnCodec GetCodecId() const { if (Codec == "none") { return NKikimrSchemeOp::EColumnCodec::ColumnCodecPlain; @@ -95,6 +100,7 @@ struct TTestSchema { s3Config.SetProxyScheme(NKikimrSchemeOp::TS3Settings::HTTP); #else s3Config.SetEndpoint("fake"); + s3Config.SetSecretKey("fakeSecret"); #endif s3Config.SetRequestTimeoutMs(10000); s3Config.SetHttpRequestTimeoutMs(10000); @@ -104,11 +110,22 @@ struct TTestSchema { }; struct TTableSpecials : public TStorageTier { + private: + bool NeedTestStatisticsFlag = true; + public: std::vector Tiers; bool WaitEmptyAfter = false; TTableSpecials() noexcept = default; + bool NeedTestStatistics() const { + return NeedTestStatisticsFlag; + } + + void SetNeedTestStatistics(const bool value) { + NeedTestStatisticsFlag = value; + } + bool HasTiers() const { return !Tiers.empty(); } @@ -127,107 +144,110 @@ struct TTestSchema { EvictAfter = ttl; return *this; } - }; - static auto YdbSchema(const std::pair& firstKeyItem = {"timestamp", TTypeInfo(NTypeIds::Timestamp) }) { - std::vector> schema = { + TString DebugString() const { + auto result = TStringBuilder() << "WaitEmptyAfter=" << WaitEmptyAfter << ";Tiers="; + for (auto&& tier : Tiers) { + result << "{" << tier.DebugString() << "}"; + } + result << ";TTL=" << TStorageTier::DebugString(); + return result; + } + }; + using TTestColumn = NArrow::NTest::TTestColumn; + static auto YdbSchema(const TTestColumn& firstKeyItem = TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp))) { + std::vector schema = { // PK firstKeyItem, - {"resource_type", TTypeInfo(NTypeIds::Utf8) }, - {"resource_id", TTypeInfo(NTypeIds::Utf8) }, - {"uid", TTypeInfo(NTypeIds::Utf8) }, - {"level", TTypeInfo(NTypeIds::Int32) }, - {"message", TTypeInfo(NTypeIds::Utf8) }, - {"json_payload", TTypeInfo(NTypeIds::Json) }, - {"ingested_at", TTypeInfo(NTypeIds::Timestamp) }, - {"saved_at", TTypeInfo(NTypeIds::Timestamp) }, - {"request_id", TTypeInfo(NTypeIds::Utf8) } + TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), + TTestColumn("level", TTypeInfo(NTypeIds::Int32) ), + TTestColumn("message", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), + TTestColumn("json_payload", TTypeInfo(NTypeIds::Json) ), + TTestColumn("ingested_at", TTypeInfo(NTypeIds::Timestamp) ), + TTestColumn("saved_at", TTypeInfo(NTypeIds::Timestamp) ), + TTestColumn("request_id", TTypeInfo(NTypeIds::Utf8) ) }; return schema; }; static auto YdbExoticSchema() { - std::vector> schema = { + std::vector schema = { // PK - {"timestamp", TTypeInfo(NTypeIds::Timestamp) }, - {"resource_type", TTypeInfo(NTypeIds::Utf8) }, - {"resource_id", TTypeInfo(NTypeIds::Utf8) }, - {"uid", TTypeInfo(NTypeIds::Utf8) }, + TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), // - {"level", TTypeInfo(NTypeIds::Int32) }, - {"message", TTypeInfo(NTypeIds::String4k) }, - {"json_payload", TTypeInfo(NTypeIds::JsonDocument) }, - {"ingested_at", TTypeInfo(NTypeIds::Timestamp) }, - {"saved_at", TTypeInfo(NTypeIds::Timestamp) }, - {"request_id", TTypeInfo(NTypeIds::Yson) } + TTestColumn("level", TTypeInfo(NTypeIds::Int32) ), + TTestColumn("message", TTypeInfo(NTypeIds::String4k) ).SetStorageId("__MEMORY"), + TTestColumn("json_payload", TTypeInfo(NTypeIds::JsonDocument) ), + TTestColumn("ingested_at", TTypeInfo(NTypeIds::Timestamp) ), + TTestColumn("saved_at", TTypeInfo(NTypeIds::Timestamp) ), + TTestColumn("request_id", TTypeInfo(NTypeIds::Yson) ) }; return schema; }; static auto YdbPkSchema() { - std::vector> schema = { - {"timestamp", TTypeInfo(NTypeIds::Timestamp) }, - {"resource_type", TTypeInfo(NTypeIds::Utf8) }, - {"resource_id", TTypeInfo(NTypeIds::Utf8) }, - {"uid", TTypeInfo(NTypeIds::Utf8) } + std::vector schema = { + TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY"), + TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ).SetStorageId("__MEMORY") }; return schema; } static auto YdbAllTypesSchema() { - std::vector> schema = { - { "ts", TTypeInfo(NTypeIds::Timestamp) }, - - { "i8", TTypeInfo(NTypeIds::Int8) }, - { "i16", TTypeInfo(NTypeIds::Int16) }, - { "i32", TTypeInfo(NTypeIds::Int32) }, - { "i64", TTypeInfo(NTypeIds::Int64) }, - { "u8", TTypeInfo(NTypeIds::Uint8) }, - { "u16", TTypeInfo(NTypeIds::Uint16) }, - { "u32", TTypeInfo(NTypeIds::Uint32) }, - { "u64", TTypeInfo(NTypeIds::Uint64) }, - { "float", TTypeInfo(NTypeIds::Float) }, - { "double", TTypeInfo(NTypeIds::Double) }, - - { "byte", TTypeInfo(NTypeIds::Byte) }, + std::vector schema = { + TTestColumn("ts", TTypeInfo(NTypeIds::Timestamp) ), + + TTestColumn( "i8", TTypeInfo(NTypeIds::Int8) ), + TTestColumn( "i16", TTypeInfo(NTypeIds::Int16) ), + TTestColumn( "i32", TTypeInfo(NTypeIds::Int32) ), + TTestColumn( "i64", TTypeInfo(NTypeIds::Int64) ), + TTestColumn( "u8", TTypeInfo(NTypeIds::Uint8) ), + TTestColumn( "u16", TTypeInfo(NTypeIds::Uint16) ), + TTestColumn( "u32", TTypeInfo(NTypeIds::Uint32) ), + TTestColumn( "u64", TTypeInfo(NTypeIds::Uint64) ), + TTestColumn( "float", TTypeInfo(NTypeIds::Float) ), + TTestColumn( "double", TTypeInfo(NTypeIds::Double) ), + + TTestColumn("byte", TTypeInfo(NTypeIds::Byte) ), //{ "bool", TTypeInfo(NTypeIds::Bool) }, //{ "decimal", TTypeInfo(NTypeIds::Decimal) }, //{ "dynum", TTypeInfo(NTypeIds::DyNumber) }, - { "date", TTypeInfo(NTypeIds::Date) }, - { "datetime", TTypeInfo(NTypeIds::Datetime) }, + TTestColumn( "date", TTypeInfo(NTypeIds::Date) ), + TTestColumn( "datetime", TTypeInfo(NTypeIds::Datetime) ), //{ "interval", TTypeInfo(NTypeIds::Interval) }, - {"text", TTypeInfo(NTypeIds::Text) }, - {"bytes", TTypeInfo(NTypeIds::Bytes) }, - {"yson", TTypeInfo(NTypeIds::Yson) }, - {"json", TTypeInfo(NTypeIds::Json) }, - {"jsondoc", TTypeInfo(NTypeIds::JsonDocument) } + TTestColumn("text", TTypeInfo(NTypeIds::Text) ), + TTestColumn("bytes", TTypeInfo(NTypeIds::Bytes) ), + TTestColumn("yson", TTypeInfo(NTypeIds::Yson) ), + TTestColumn("json", TTypeInfo(NTypeIds::Json) ), + TTestColumn("jsondoc", TTypeInfo(NTypeIds::JsonDocument) ) }; return schema; }; - static NKikimrSchemeOp::TOlapColumnDescription CreateColumn(ui32 id, const TString& name, TTypeInfo type) { - NKikimrSchemeOp::TOlapColumnDescription col; - col.SetId(id); - col.SetName(name); - auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(type, ""); - col.SetTypeId(columnType.TypeId); - if (columnType.TypeInfo) { - *col.MutableTypeInfo() = *columnType.TypeInfo; - } - return col; - } - - static void InitSchema(const std::vector>& columns, - const std::vector>& pk, + static void InitSchema(const std::vector& columns, + const std::vector& pk, const TTableSpecials& specials, NKikimrSchemeOp::TColumnTableSchema* schema) { schema->SetEngine(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); for (ui32 i = 0; i < columns.size(); ++i) { - *schema->MutableColumns()->Add() = CreateColumn(i + 1, columns[i].first, columns[i].second); + *schema->MutableColumns()->Add() = columns[i].CreateColumn(i + 1); + if (!specials.NeedTestStatistics()) { + continue; + } + if (NOlap::NStatistics::NMax::TOperator::IsAvailableType(columns[i].GetType())) { + *schema->AddStatistics() = NOlap::NStatistics::TOperatorContainer("MAX::" + columns[i].GetName(), std::make_shared(i + 1)).SerializeToProto(); + } } Y_ABORT_UNLESS(pk.size() > 0); @@ -261,12 +281,13 @@ struct TTestSchema { return specials.HasTiers() || specials.HasTtl(); } - static TString CreateTableTxBody(ui64 pathId, const std::vector>& columns, - const std::vector>& pk, - const TTableSpecials& specialsExt = {}) + static TString CreateTableTxBody(ui64 pathId, const std::vector& columns, + const std::vector& pk, + const TTableSpecials& specialsExt = {}, ui64 generation = 0) { auto specials = specialsExt; NKikimrTxColumnShard::TSchemaTxBody tx; + tx.MutableSeqNo()->SetGeneration(generation); auto* table = tx.MutableEnsureTables()->AddTables(); table->SetPathId(pathId); @@ -288,8 +309,8 @@ struct TTestSchema { return out; } - static TString CreateInitShardTxBody(ui64 pathId, const std::vector>& columns, - const std::vector>& pk, + static TString CreateInitShardTxBody(ui64 pathId, const std::vector& columns, + const std::vector& pk, const TTableSpecials& specials = {}, const TString& ownerPath = "/Root/olap") { NKikimrTxColumnShard::TSchemaTxBody tx; auto* table = tx.MutableInitShard()->AddTables(); @@ -306,8 +327,8 @@ struct TTestSchema { return out; } - static TString CreateStandaloneTableTxBody(ui64 pathId, const std::vector>& columns, - const std::vector>& pk, + static TString CreateStandaloneTableTxBody(ui64 pathId, const std::vector& columns, + const std::vector& pk, const TTableSpecials& specials = {}) { NKikimrTxColumnShard::TSchemaTxBody tx; auto* table = tx.MutableEnsureTables()->AddTables(); @@ -377,20 +398,20 @@ struct TTestSchema { return txBody; } - static std::vector ExtractNames(const std::vector>& columns) { + static std::vector ExtractNames(const std::vector& columns) { std::vector out; out.reserve(columns.size()); for (auto& col : columns) { - out.push_back(col.first); + out.push_back(col.GetName()); } return out; } - static std::vector ExtractTypes(const std::vector>& columns) { + static std::vector ExtractTypes(const std::vector& columns) { std::vector types; types.reserve(columns.size()); - for (auto& [name, type] : columns) { - types.push_back(type); + for (auto& i : columns) { + types.push_back(i.GetType()); } return types; } @@ -403,14 +424,14 @@ void PlanSchemaTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot void PlanWriteTx(TTestBasicRuntime& runtime, TActorId& sender, NOlap::TSnapshot snap, bool waitResult = true); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 shardId, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector>& ydbSchema, std::vector* writeIds); + const std::vector& ydbSchema, std::vector* writeIds); bool WriteData(TTestBasicRuntime& runtime, TActorId& sender, const ui64 writeId, const ui64 tableId, const TString& data, - const std::vector>& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr); + const std::vector& ydbSchema, bool waitResult = true, std::vector* writeIds = nullptr); std::optional WriteData(TTestBasicRuntime& runtime, TActorId& sender, const NLongTxService::TLongTxId& longTxId, ui64 tableId, const ui64 writePartId, const TString& data, - const std::vector>& ydbSchema); + const std::vector& ydbSchema); ui32 WaitWriteResult(TTestBasicRuntime& runtime, ui64 shardId, std::vector* writeIds = nullptr); @@ -436,10 +457,10 @@ struct TTestBlobOptions { }; TCell MakeTestCell(const TTypeInfo& typeInfo, ui32 value, std::vector& mem); -TString MakeTestBlob(std::pair range, const std::vector>& columns, +TString MakeTestBlob(std::pair range, const std::vector& columns, const TTestBlobOptions& options = {}, const std::set& notNullColumns = {}); TSerializedTableRange MakeTestRange(std::pair range, bool inclusiveFrom, bool inclusiveTo, - const std::vector>& columns); + const std::vector& columns); } @@ -508,6 +529,13 @@ namespace NKikimr::NColumnShard { Y_ABORT_UNLESS(Builders.size() == schema->fields().size()); } + TTableUpdatesBuilder(arrow::Result> schema) { + UNIT_ASSERT_C(schema.ok(), schema.status().ToString()); + Schema = schema.ValueUnsafe(); + Builders = NArrow::MakeBuilders(Schema); + Y_ABORT_UNLESS(Builders.size() == Schema->fields().size()); + } + TRowBuilder AddRow() { ++RowsCount; return TRowBuilder(0, *this); @@ -525,20 +553,22 @@ namespace NKikimr::NColumnShard { } }; - NOlap::TIndexInfo BuildTableInfo(const std::vector>& ydbSchema, - const std::vector>& key); + NOlap::TIndexInfo BuildTableInfo(const std::vector& ydbSchema, + const std::vector& key); struct TestTableDescription { - std::vector> Schema = NTxUT::TTestSchema::YdbSchema(); - std::vector> Pk = NTxUT::TTestSchema::YdbPkSchema(); + std::vector Schema = NTxUT::TTestSchema::YdbSchema(); + std::vector Pk = NTxUT::TTestSchema::YdbPkSchema(); bool InStore = true; }; void SetupSchema(TTestBasicRuntime& runtime, TActorId& sender, ui64 pathId, const TestTableDescription& table = {}, TString codec = "none"); + void SetupSchema(TTestBasicRuntime& runtime, TActorId& sender, const TString& txBody, const NOlap::TSnapshot& snapshot, bool succeed = true); - void PrepareTablet(TTestBasicRuntime& runtime, const ui64 tableId, const std::vector>& schema, const ui32 keySize = 1); + void PrepareTablet(TTestBasicRuntime& runtime, const ui64 tableId, const std::vector& schema, const ui32 keySize = 1); + void PrepareTablet(TTestBasicRuntime& runtime, const TString& schemaTxBody, bool succeed); - std::shared_ptr ReadAllAsBatch(TTestBasicRuntime& runtime, const ui64 tableId, const NOlap::TSnapshot& snapshot, const std::vector>& schema); + std::shared_ptr ReadAllAsBatch(TTestBasicRuntime& runtime, const ui64 tableId, const NOlap::TSnapshot& snapshot, const std::vector& schema); } diff --git a/ydb/core/tx/columnshard/common/blob.cpp b/ydb/core/tx/columnshard/common/blob.cpp new file mode 100644 index 000000000000..6bcf397e339a --- /dev/null +++ b/ydb/core/tx/columnshard/common/blob.cpp @@ -0,0 +1,173 @@ +#include "blob.h" +#include +#include + +#include + +namespace NKikimr::NOlap { + +namespace { + +#define PARSE_INT_COMPONENT(fieldType, fieldName, endChar) \ + if (pos >= endPos) { \ + error = "Failed to parse " #fieldName " component"; \ + return TUnifiedBlobId(); \ + } \ + fieldType fieldName = -1; \ + { \ + auto [ptr, ec] { std::from_chars(str + pos, str + endPos, fieldName) }; \ + if (ec != std::errc()) { \ + error = "Failed to parse " #fieldName " component"; \ + return TUnifiedBlobId(); \ + } else { \ + pos = ptr - str; \ + } \ + if (str[pos++] != endChar) { \ + error = #endChar " not found after " #fieldName; \ + return TUnifiedBlobId(); \ + } \ + } + +// Format: "DS:group:logoBlobId" +// Example: "DS:2181038103:[72075186224038245:51:31595:2:0:11952:0]" +TUnifiedBlobId ParseExtendedDsBlobId(const TString& s, TString& error) { + Y_ABORT_UNLESS(s.size() > 2); + const char* str = s.c_str(); + Y_ABORT_UNLESS(str[0] == 'D' && str[1] == 'S'); + i64 pos = 2; + i64 endPos = s.size(); + if (str[pos++] != ':') { + error = "Starting ':' not found"; + return TUnifiedBlobId(); + } + + PARSE_INT_COMPONENT(ui32, dsGroup, ':'); + + TLogoBlobID logoBlobId; + if (!TLogoBlobID::Parse(logoBlobId, s.substr(pos), error)) { + return TUnifiedBlobId(); + } + + return TUnifiedBlobId(dsGroup, logoBlobId); +} + +} + +TUnifiedBlobId TUnifiedBlobId::ParseFromString(const TString& str, + const IBlobGroupSelector* dsGroupSelector, TString& error) { + if (str.size() <= 2) { + error = TStringBuilder() << "Wrong blob id: '" << str << "'"; + return TUnifiedBlobId(); + } + + if (str[0] == '[') { + // If blobId starts with '[' this must be a logoblobId and if channel is set to FAKE_CHANNEL + // this is a fake logoblobid used for small blob + TLogoBlobID logoBlobId; + bool parsed = TLogoBlobID::Parse(logoBlobId, str, error); + if (!parsed) { + error = "Cannot parse TLogoBlobID: " + error; + return TUnifiedBlobId(); + } + // DS blob + if (!dsGroupSelector) { + error = "Need TBlobGroupSelector to resolve DS group for the blob"; + return TUnifiedBlobId(); + } + return TUnifiedBlobId(dsGroupSelector->GetGroup(logoBlobId), logoBlobId); + } else if (str[0] == 'D' && str[1] == 'S') { + return ParseExtendedDsBlobId(str, error); + } + + error = TStringBuilder() << "Wrong blob id: '" << str << "'"; + return TUnifiedBlobId(); +} + +NKikimr::TConclusionStatus TUnifiedBlobId::DeserializeFromProto(const NKikimrColumnShardProto::TUnifiedBlobId& proto) { + Id.DsGroup = proto.GetDsGroup(); + TStringBuf sb(proto.GetBlobId().data(), proto.GetBlobId().size()); + Id.BlobId = TLogoBlobID::FromBinary(sb); + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TUnifiedBlobId::BuildFromProto(const NKikimrColumnShardProto::TUnifiedBlobId& proto) { + TUnifiedBlobId result; + auto parse = result.DeserializeFromProto(proto); + if (!parse) { + return parse; + } + return result; +} + +NKikimrColumnShardProto::TUnifiedBlobId TUnifiedBlobId::SerializeToProto() const { + NKikimrColumnShardProto::TUnifiedBlobId result; + result.SetDsGroup(Id.DsGroup); + result.SetBlobId(Id.BlobId.AsBinaryString()); + return result; +} + +NKikimr::TConclusionStatus TBlobRange::DeserializeFromProto(const NKikimrColumnShardProto::TBlobRange& proto) { + auto parsed = TUnifiedBlobId::BuildFromString(proto.GetBlobId(), nullptr); + if (!parsed) { + return parsed; + } + BlobId = parsed.DetachResult(); + + Offset = proto.GetOffset(); + Size = proto.GetSize(); + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TBlobRange::BuildFromProto(const NKikimrColumnShardProto::TBlobRange& proto) { + TBlobRange result; + auto parsed = result.DeserializeFromProto(proto); + if (!parsed) { + return parsed; + } else { + return result; + } +} + +NKikimrColumnShardProto::TBlobRange TBlobRange::SerializeToProto() const { + NKikimrColumnShardProto::TBlobRange result; + result.SetBlobId(BlobId.ToStringNew()); + result.SetOffset(Offset); + result.SetSize(Size); + return result; +} + +NKikimr::TConclusionStatus TBlobRangeLink16::DeserializeFromProto(const NKikimrColumnShardProto::TBlobRangeLink16& proto) { + BlobIdx = proto.GetBlobIdx(); + Offset = proto.GetOffset(); + Size = proto.GetSize(); + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TBlobRangeLink16::BuildFromProto(const NKikimrColumnShardProto::TBlobRangeLink16& proto) { + TBlobRangeLink16 result; + auto parsed = result.DeserializeFromProto(proto); + if (!parsed) { + return parsed; + } else { + return result; + } +} + +NKikimrColumnShardProto::TBlobRangeLink16 TBlobRangeLink16::SerializeToProto() const { + NKikimrColumnShardProto::TBlobRangeLink16 result; + result.SetBlobIdx(GetBlobIdxVerified()); + result.SetOffset(Offset); + result.SetSize(Size); + return result; +} + +ui16 TBlobRangeLink16::GetBlobIdxVerified() const { + AFL_VERIFY(BlobIdx); + return *BlobIdx; +} + +NKikimr::NOlap::TBlobRange TBlobRangeLink16::RestoreRange(const TUnifiedBlobId& blobId) const { + return TBlobRange(blobId, Offset, Size); +} + +} diff --git a/ydb/core/tx/columnshard/common/blob.h b/ydb/core/tx/columnshard/common/blob.h new file mode 100644 index 000000000000..d7deea1df72d --- /dev/null +++ b/ydb/core/tx/columnshard/common/blob.h @@ -0,0 +1,410 @@ +#pragma once + +#include +#include + +#include + +namespace NKikimrColumnShardProto { +class TBlobRange; +class TBlobRangeLink16; +class TUnifiedBlobId; +} + +namespace NKikimr::NOlap { + +class IBlobGroupSelector { +protected: + virtual ~IBlobGroupSelector() = default; + +public: + virtual ui32 GetGroup(const TLogoBlobID& blobId) const = 0; +}; + +class TUnifiedBlobId; + +class TUnifiedBlobId { + // Id of a blob in YDB distributed storage + struct TDsBlobId { + TLogoBlobID BlobId; + ui32 DsGroup; + + bool operator == (const TDsBlobId& other) const { + return BlobId == other.BlobId && DsGroup == other.DsGroup; + } + + TString ToStringNew() const { + return Sprintf( "DS:%" PRIu32 ":%s", DsGroup, BlobId.ToString().c_str()); + } + + TString ToStringLegacy() const { + return BlobId.ToString(); + } + + ui64 Hash() const { + return CombineHashes(BlobId.Hash(), IntHash(DsGroup)); + } + }; + + TDsBlobId Id; + +public: + TUnifiedBlobId() = default; + + // Initialize as DS blob Id + TUnifiedBlobId(ui32 dsGroup, const TLogoBlobID& logoBlobId) + : Id(TDsBlobId{logoBlobId, dsGroup}) + {} + + // Initialize as Small blob Id + TUnifiedBlobId(ui64 tabletId, ui32 gen, ui32 step, ui32 cookie, ui32 channel, const ui32 groupId, ui32 size) + : Id(TDsBlobId{TLogoBlobID(tabletId, gen, step, channel, size, cookie), groupId}) + {} + + TUnifiedBlobId(const TUnifiedBlobId& other) = default; + TUnifiedBlobId& operator = (const TUnifiedBlobId& logoBlobId) = default; + TUnifiedBlobId(TUnifiedBlobId&& other) = default; + TUnifiedBlobId& operator = (TUnifiedBlobId&& logoBlobId) = default; + + static TUnifiedBlobId BuildRaw(const ui32 groupId, const ui64 tabletId, const ui64 r1, const ui64 r2) { + return TUnifiedBlobId(groupId, TLogoBlobID(tabletId, r1, r2)); + } + + NKikimrColumnShardProto::TUnifiedBlobId SerializeToProto() const; + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardProto::TUnifiedBlobId& proto); + + static TConclusion BuildFromProto(const NKikimrColumnShardProto::TUnifiedBlobId& proto); + + static TConclusion BuildFromString(const TString& id, const IBlobGroupSelector* dsGroupSelector) { + TString error; + TUnifiedBlobId result = ParseFromString(id, dsGroupSelector, error); + if (!result.IsValid()) { + return TConclusionStatus::Fail(error); + } + return result; + } + + static TUnifiedBlobId ParseFromString(const TString& str, + const IBlobGroupSelector* dsGroupSelector, TString& error); + + bool operator == (const TUnifiedBlobId& other) const { + return Id == other.Id; + } + + bool IsValid() const { + return Id.BlobId.IsValid(); + } + + size_t BlobSize() const { + return Id.BlobId.BlobSize(); + } + + TLogoBlobID GetLogoBlobId() const { + return Id.BlobId; + } + + ui32 GetDsGroup() const { + return Id.DsGroup; + } + + ui64 GetTabletId() const { + return Id.BlobId.TabletID(); + } + + ui64 Hash() const noexcept { + return Id.Hash(); + } + + // This is only implemented for DS for backward compatibility with persisted data. + // All new functionality should rahter use string blob id representation + TString SerializeBinary() const { + return TString((const char*)GetLogoBlobId().GetRaw(), sizeof(TLogoBlobID)); + } + + TString ToStringLegacy() const { + return Id.ToStringLegacy(); + } + + TString ToStringNew() const { + return Id.ToStringNew(); + } +}; + + +// Describes a range of bytes in a blob. It is used for read requests and for caching. +struct TBlobRange; +class TBlobRangeLink16 { +public: + using TLinkId = ui16; + + std::optional BlobIdx; + ui32 Offset; + ui32 Size; + + TBlobRangeLink16() = default; + + ui32 GetSize() const { + return Size; + } + + ui32 GetOffset() const { + return Offset; + } + + explicit TBlobRangeLink16(ui32 offset, ui32 size) + : Offset(offset) + , Size(size) { + } + + explicit TBlobRangeLink16(const ui16 blobIdx, ui32 offset, ui32 size) + : BlobIdx(blobIdx) + , Offset(offset) + , Size(size) { + } + + ui16 GetBlobIdxVerified() const; + + bool IsValid() const { + return !!BlobIdx; + } + + NKikimrColumnShardProto::TBlobRangeLink16 SerializeToProto() const; + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardProto::TBlobRangeLink16& proto); + static TConclusion BuildFromProto(const NKikimrColumnShardProto::TBlobRangeLink16& proto); + TString ToString() const { + TStringBuilder result; + result << "["; + if (BlobIdx) { + result << *BlobIdx; + } else { + result << "NO_BLOB"; + } + return result << ":" << Offset << ":" << Size << "]"; + } + + TBlobRange RestoreRange(const TUnifiedBlobId& blobId) const; +}; + +struct TBlobRange { + TUnifiedBlobId BlobId; + ui32 Offset; + ui32 Size; + + bool operator<(const TBlobRange& br) const { + if (BlobId != br.BlobId) { + return BlobId.GetLogoBlobId().Compare(br.BlobId.GetLogoBlobId()) < 0; + } else if (Offset != br.Offset) { + return Offset < br.Offset; + } else { + return Size < br.Size; + } + } + + const TUnifiedBlobId& GetBlobId() const { + return BlobId; + } + + bool IsNextRangeFor(const TBlobRange& br) const { + return BlobId == br.BlobId && br.Offset + br.Size == Offset; + } + + bool TryGlueSameBlob(const TBlobRange& br, const ui64 limit) { + if (GetBlobId() != br.GetBlobId()) { + return false; + } + const ui32 right = std::max(Offset + Size, br.Offset + br.Size); + const ui32 offset = std::min(Offset, br.Offset); + const ui32 size = right - offset; + if (size > limit) { + return false; + } + Size = size; + Offset = offset; + return true; + } + + bool TryGlueWithNext(const TBlobRange& br) { + if (!br.IsNextRangeFor(*this)) { + return false; + } + Size += br.Size; + return true; + } + + TBlobRangeLink16 BuildLink(const TBlobRangeLink16::TLinkId idx) const { + return TBlobRangeLink16(idx, Offset, Size); + } + + TBlobRangeLink16 IncorrectLink() const { + return TBlobRangeLink16(Offset, Size); + } + + bool IsValid() const { + return BlobId.IsValid() && Size && Offset + Size <= BlobId.BlobSize(); + } + + ui32 GetBlobSize() const { + return Size; + } + + bool IsFullBlob() const { + return Size == BlobId.BlobSize(); + } + + explicit TBlobRange(const TUnifiedBlobId& blobId = TUnifiedBlobId(), ui32 offset = 0, ui32 size = 0) + : BlobId(blobId) + , Offset(offset) + , Size(size) + { + if (Size > 0) { + Y_ABORT_UNLESS(Offset < BlobId.BlobSize()); + Y_ABORT_UNLESS(Offset + Size <= BlobId.BlobSize()); + } + } + + static TBlobRange FromBlobId(const TUnifiedBlobId& blobId) { + return TBlobRange(blobId, 0, blobId.BlobSize()); + } + + bool operator == (const TBlobRange& other) const { + return + BlobId == other.BlobId && + Offset == other.Offset && + Size == other.Size; + } + + ui64 Hash() const noexcept { + ui64 hash = BlobId.Hash(); + hash = CombineHashes(hash, IntHash(Offset)); + hash = CombineHashes(hash, IntHash(Size)); + return hash; + } + + TString ToString() const { + return Sprintf("{ Blob: %s Offset: %" PRIu32 " Size: %" PRIu32 " }", + BlobId.ToStringNew().c_str(), Offset, Size); + } + + NKikimrColumnShardProto::TBlobRange SerializeToProto() const; + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardProto::TBlobRange& proto); + static TConclusion BuildFromProto(const NKikimrColumnShardProto::TBlobRange& proto); +}; + +class IBlobInUseTracker { +private: + virtual bool DoFreeBlob(const NOlap::TUnifiedBlobId& blobId) = 0; + virtual bool DoUseBlob(const NOlap::TUnifiedBlobId& blobId) = 0; +public: + virtual ~IBlobInUseTracker() = default; + + bool FreeBlob(const NOlap::TUnifiedBlobId& blobId) { + return DoFreeBlob(blobId); + } + bool UseBlob(const NOlap::TUnifiedBlobId& blobId) { + return DoUseBlob(blobId); + } + + virtual bool IsBlobInUsage(const NOlap::TUnifiedBlobId& blobId) const = 0; +}; + +// Expected blob lifecycle: EVICTING -> SELF_CACHED -> EXTERN <-> CACHED +enum class EEvictState : ui8 { + UNKNOWN = 0, + EVICTING = 1, // source, extern, cached blobs: 1-- + SELF_CACHED = 2, // source, extern, cached blobs: 11- + EXTERN = 3, // source, extern, cached blobs: -1- + CACHED = 4, // source, extern, cached blobs: -11 + ERASING = 5, // source, extern, cached blobs: -?? + //ERASED = 6, // source, extern, cached blobs: --- +}; + +inline bool IsExported(EEvictState state) { + return state == EEvictState::SELF_CACHED || + state == EEvictState::EXTERN || + state == EEvictState::CACHED; +} + +inline bool CouldBeExported(EEvictState state) { + return state == EEvictState::SELF_CACHED || + state == EEvictState::EXTERN || + state == EEvictState::CACHED || + state == EEvictState::ERASING; +} + +inline bool IsDeleted(EEvictState state) { + return ui8(state) >= ui8(EEvictState::EXTERN); // !EVICTING and !SELF_CACHED +} + +struct TEvictedBlob { + EEvictState State = EEvictState::UNKNOWN; + TUnifiedBlobId Blob; + TUnifiedBlobId ExternBlob; + TUnifiedBlobId CachedBlob; + + bool operator == (const TEvictedBlob& other) const { + return Blob == other.Blob; + } + + ui64 Hash() const noexcept { + return Blob.Hash(); + } + + bool IsEvicting() const { + return State == EEvictState::EVICTING; + } + + bool IsExternal() const { + if (State == EEvictState::EXTERN) { + Y_ABORT_UNLESS(ExternBlob.IsValid()); + return true; + } + return false; + } + + TString ToString() const { + return TStringBuilder() << "state: " << (ui32)State + << " blob: " << Blob.ToStringNew() + << " extern: " << ExternBlob.ToStringNew() + << " cached: " << CachedBlob.ToStringNew(); + } +}; + +} + +inline +IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TUnifiedBlobId& blobId) { + return out << blobId.ToStringNew(); +} + +inline +IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TBlobRange& blobRange) { + return out << blobRange.ToString(); +} + +inline +IOutputStream& operator <<(IOutputStream& out, const NKikimr::NOlap::TBlobRangeLink16& blobRange) { + return out << blobRange.ToString(); +} + +template<> +struct ::THash { + inline ui64 operator()(const NKikimr::NOlap::TUnifiedBlobId& a) const { + return a.Hash(); + } +}; + +template <> +struct THash { + inline size_t operator() (const NKikimr::NOlap::TBlobRange& key) const { + return key.Hash(); + } +}; + +template <> +struct THash { + inline size_t operator() (const NKikimr::NOlap::TEvictedBlob& key) const { + return key.Hash(); + } +}; diff --git a/ydb/core/tx/columnshard/common/protos/blob_range.proto b/ydb/core/tx/columnshard/common/protos/blob_range.proto new file mode 100644 index 000000000000..fe72258f223c --- /dev/null +++ b/ydb/core/tx/columnshard/common/protos/blob_range.proto @@ -0,0 +1,18 @@ +package NKikimrColumnShardProto; + +message TUnifiedBlobId { + optional uint32 DsGroup = 1; + optional string BlobId = 2; +} + +message TBlobRange { + optional string BlobId = 1; + optional uint64 Offset = 2; + optional uint64 Size = 3; +} + +message TBlobRangeLink16 { + optional uint32 BlobIdx = 1; + optional uint64 Offset = 2; + optional uint64 Size = 3; +} diff --git a/ydb/core/tx/columnshard/common/protos/ya.make b/ydb/core/tx/columnshard/common/protos/ya.make index 500d562e466e..872e967bc018 100644 --- a/ydb/core/tx/columnshard/common/protos/ya.make +++ b/ydb/core/tx/columnshard/common/protos/ya.make @@ -2,6 +2,7 @@ PROTO_LIBRARY() SRCS( snapshot.proto + blob_range.proto ) PEERDIR( diff --git a/ydb/core/tx/columnshard/common/tablet_id.cpp b/ydb/core/tx/columnshard/common/tablet_id.cpp new file mode 100644 index 000000000000..37cff9105404 --- /dev/null +++ b/ydb/core/tx/columnshard/common/tablet_id.cpp @@ -0,0 +1,12 @@ +#include "tablet_id.h" +#include +#include + +namespace NKikimr::NOlap { + +} + +template <> +void Out(IOutputStream& os, TTypeTraits::TFuncParam val) { + os << (ui64)val; +} diff --git a/ydb/core/tx/columnshard/common/tablet_id.h b/ydb/core/tx/columnshard/common/tablet_id.h new file mode 100644 index 000000000000..ca1f59d64c77 --- /dev/null +++ b/ydb/core/tx/columnshard/common/tablet_id.h @@ -0,0 +1,8 @@ +#pragma once +#include + +namespace NKikimr::NOlap { +enum class TTabletId: ui64 { +}; + +} diff --git a/ydb/core/tx/columnshard/common/tests/shard_reader.h b/ydb/core/tx/columnshard/common/tests/shard_reader.h index 777c93d8f1f5..1bb3ad353835 100644 --- a/ydb/core/tx/columnshard/common/tests/shard_reader.h +++ b/ydb/core/tx/columnshard/common/tests/shard_reader.h @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -220,8 +219,8 @@ class TShardReader { if (auto* evData = std::get<0>(event)) { auto b = evData->ArrowBatch; if (b) { - NArrow::TStatusValidator::Validate(b->ValidateFull()); - ResultBatches.push_back(b); + ResultBatches.push_back(NArrow::ToBatch(b, true)); + NArrow::TStatusValidator::Validate(ResultBatches.back()->ValidateFull()); } else { AFL_VERIFY(evData->Finished); } diff --git a/ydb/core/tx/columnshard/common/ya.make b/ydb/core/tx/columnshard/common/ya.make index a73340ea9681..993b47fff695 100644 --- a/ydb/core/tx/columnshard/common/ya.make +++ b/ydb/core/tx/columnshard/common/ya.make @@ -6,6 +6,8 @@ SRCS( scalars.cpp snapshot.cpp portion.cpp + tablet_id.cpp + blob.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/config.clang-format b/ydb/core/tx/columnshard/config.clang-format new file mode 100644 index 000000000000..f7d926ac1df2 --- /dev/null +++ b/ydb/core/tx/columnshard/config.clang-format @@ -0,0 +1,41 @@ +BasedOnStyle: Google +IndentWidth: 4 +ColumnLimit: 232 +SpacesBeforeTrailingComments: 3 +InsertBraces : true +DerivePointerAlignment: true +PointerAlignment: Left +AccessModifierOffset: -4 +IndentCaseLabels: true +ShortBlockStyle: SBS_Never +AllowShortCaseLabelsOnASingleLine : false +AllowShortCompoundRequirementOnASingleLine : false +AllowShortEnumsOnASingleLine : false +AllowShortFunctionsOnASingleLine : false +AllowShortLambdasOnASingleLine : false +ShortIfStyle: SIS_Never +ShortLambdaStyle: SLS_None +AllowShortLoopsOnASingleLine : false +AllowAllParametersOfDeclarationOnNextLine: true +ConstructorInitializerAllOnOneLineOrOnePerLine: false +PackConstructorInitializersNever: true +BreakConstructorInitializersBeforeComma: true +IncludeCategories: + - Regex: '^"[0-9,a-z,A-Z,_]*"' + Priority: 0 + SortPriority: 0 + - Regex: '^".*' + Priority: 3 + SortPriority: 3 + - Regex: '^<.*ydb/core.*' + Priority: 5 + SortPriority: 5 + - Regex: '^\<.*ydb/.*' + Priority: 10 + SortPriority: 10 + - Regex: '^\<.*\/.*' + Priority: 15 + SortPriority: 15 + - Regex: '.*' + Priority: 20 + SortPriority: 20 diff --git a/ydb/core/tx/columnshard/counters/blobs_manager.cpp b/ydb/core/tx/columnshard/counters/blobs_manager.cpp index 8e67ada3932c..1a590fb36387 100644 --- a/ydb/core/tx/columnshard/counters/blobs_manager.cpp +++ b/ydb/core/tx/columnshard/counters/blobs_manager.cpp @@ -52,7 +52,7 @@ void TBlobsManagerCounters::OnBlobsKeep(const TSet& blobs) const { // BlobsKeepBytes->Set(size); } -void TBlobsManagerCounters::OnBlobsDelete(const TSet& /*blobs*/) const { +void TBlobsManagerCounters::OnBlobsDelete(const NOlap::TTabletsByBlob& /*blobs*/) const { // BlobsDeleteCount->Set(blobs.size()); // ui64 size = 0; // for (auto&& i : blobs) { diff --git a/ydb/core/tx/columnshard/counters/blobs_manager.h b/ydb/core/tx/columnshard/counters/blobs_manager.h index ef11e508261f..8fdc25436267 100644 --- a/ydb/core/tx/columnshard/counters/blobs_manager.h +++ b/ydb/core/tx/columnshard/counters/blobs_manager.h @@ -1,7 +1,14 @@ #pragma once +#include "common/owner.h" + #include +#include + #include -#include "common/owner.h" + +namespace NKikimr::NOlap { +class TTabletsByBlob; +} namespace NKikimr::NColumnShard { @@ -48,7 +55,7 @@ class TBlobsManagerCounters: public TCommonCountersOwner { void OnBlobsKeep(const TSet& blobs) const; - void OnBlobsDelete(const TSet& /*blobs*/) const; + void OnBlobsDelete(const NOlap::TTabletsByBlob& blobs) const; void OnAddSmallBlob(const ui32 bSize) const { AddSmallBlobBytes->Add(bSize); diff --git a/ydb/core/tx/columnshard/counters/columnshard.cpp b/ydb/core/tx/columnshard/counters/columnshard.cpp index e094fbbcd5ac..64c7e2931c5f 100644 --- a/ydb/core/tx/columnshard/counters/columnshard.cpp +++ b/ydb/core/tx/columnshard/counters/columnshard.cpp @@ -24,14 +24,18 @@ TCSCounters::TCSCounters() FutureIndexationInputBytes = TBase::GetDeriviative("FutureIndexationInput/Bytes"); IndexationInputBytes = TBase::GetDeriviative("IndexationInput/Bytes"); - OverloadInsertTableBytes = TBase::GetDeriviative("OverloadInsertTable/Bytes"); - OverloadInsertTableCount = TBase::GetDeriviative("OverloadInsertTable/Count"); - OverloadShardTxBytes = TBase::GetDeriviative("OverloadShard/Tx/Bytes"); - OverloadShardTxCount = TBase::GetDeriviative("OverloadShard/Tx/Count"); - OverloadShardWritesBytes = TBase::GetDeriviative("OverloadShard/Writes/Bytes"); - OverloadShardWritesCount = TBase::GetDeriviative("OverloadShard/Writes/Count"); - OverloadShardWritesSizeBytes = TBase::GetDeriviative("OverloadShard/WritesSize/Bytes"); - OverloadShardWritesSizeCount = TBase::GetDeriviative("OverloadShard/WritesSize/Count"); + IndexMetadataLimitBytes = TBase::GetValue("IndexMetadata/Limit/Bytes"); + + OverloadInsertTableBytes = TBase::GetDeriviative("Overload/InsertTable/Bytes"); + OverloadInsertTableCount = TBase::GetDeriviative("Overload/InsertTable/Count"); + OverloadMetadataBytes = TBase::GetDeriviative("Overload/Metadata/Bytes"); + OverloadMetadataCount = TBase::GetDeriviative("Overload/Metadata/Count"); + OverloadShardTxBytes = TBase::GetDeriviative("Overload/Shard/Tx/Bytes"); + OverloadShardTxCount = TBase::GetDeriviative("Overload/Shard/Tx/Count"); + OverloadShardWritesBytes = TBase::GetDeriviative("Overload/Shard/Writes/Bytes"); + OverloadShardWritesCount = TBase::GetDeriviative("Overload/Shard/Writes/Count"); + OverloadShardWritesSizeBytes = TBase::GetDeriviative("Overload/Shard/WritesSize/Bytes"); + OverloadShardWritesSizeCount = TBase::GetDeriviative("Overload/Shard/WritesSize/Count"); InternalCompactionGranuleBytes = TBase::GetValueAutoAggregationsClient("InternalCompaction/Bytes"); InternalCompactionGranulePortionsCount = TBase::GetValueAutoAggregationsClient("InternalCompaction/PortionsCount"); diff --git a/ydb/core/tx/columnshard/counters/columnshard.h b/ydb/core/tx/columnshard/counters/columnshard.h index 2441ca6fef3b..6bada377df17 100644 --- a/ydb/core/tx/columnshard/counters/columnshard.h +++ b/ydb/core/tx/columnshard/counters/columnshard.h @@ -34,8 +34,12 @@ class TCSCounters: public TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr FutureIndexationInputBytes; NMonitoring::TDynamicCounters::TCounterPtr IndexationInputBytes; + NMonitoring::TDynamicCounters::TCounterPtr IndexMetadataLimitBytes; + NMonitoring::TDynamicCounters::TCounterPtr OverloadInsertTableBytes; NMonitoring::TDynamicCounters::TCounterPtr OverloadInsertTableCount; + NMonitoring::TDynamicCounters::TCounterPtr OverloadMetadataBytes; + NMonitoring::TDynamicCounters::TCounterPtr OverloadMetadataCount; NMonitoring::TDynamicCounters::TCounterPtr OverloadShardTxBytes; NMonitoring::TDynamicCounters::TCounterPtr OverloadShardTxCount; NMonitoring::TDynamicCounters::TCounterPtr OverloadShardWritesBytes; @@ -131,6 +135,11 @@ class TCSCounters: public TCommonCountersOwner { OverloadInsertTableCount->Add(1); } + void OnOverloadMetadata(const ui64 size) const { + OverloadMetadataBytes->Add(size); + OverloadMetadataCount->Add(1); + } + void OnOverloadShardTx(const ui64 size) const { OverloadShardTxBytes->Add(size); OverloadShardTxCount->Add(1); @@ -164,6 +173,10 @@ class TCSCounters: public TCommonCountersOwner { IndexationInputBytes->Add(size); } + void OnIndexMetadataLimit(const ui64 limit) const { + IndexMetadataLimitBytes->Set(limit); + } + void OnStartBackground() const { StartBackgroundCount->Add(1); } diff --git a/ydb/core/tx/columnshard/counters/common/histogram.cpp b/ydb/core/tx/columnshard/counters/common/histogram.cpp new file mode 100644 index 000000000000..096ea8dc5c08 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/common/histogram.cpp @@ -0,0 +1,83 @@ +#include "histogram.h" + +namespace NKikimr::NColumnShard { + +TIncrementalHistogram::TIncrementalHistogram(const TString& moduleId, const TString& metricId, const TString& category, const std::set& values) + : TBase(moduleId) +{ + DeepSubGroup("metric", metricId); + if (category) { + DeepSubGroup("category", category); + } + std::optional predName; + for (auto&& i : values) { + if (!predName) { + Counters.emplace(i, TBase::GetValue("(-Inf," + ::ToString(i) + "]")); + } else { + Counters.emplace(i, TBase::GetValue("(" + *predName + "," + ::ToString(i) + "]")); + } + predName = ::ToString(i); + } + Y_ABORT_UNLESS(predName); + PlusInf = TBase::GetValue("(" + *predName + ",+Inf)"); +} + +TIncrementalHistogram::TIncrementalHistogram(const TString& moduleId, const TString& metricId, const TString& category, const std::map& values) + : TBase(moduleId) +{ + DeepSubGroup("metric", metricId); + if (category) { + DeepSubGroup("category", category); + } + std::optional predName; + for (auto&& i : values) { + if (!predName) { + Counters.emplace(i.first, TBase::GetValue("(-Inf," + i.second + "]")); + } else { + Counters.emplace(i.first, TBase::GetValue("(" + *predName + "," + i.second + "]")); + } + predName = i.second; + } + Y_ABORT_UNLESS(predName); + PlusInf = TBase::GetValue("(" + *predName + ",+Inf)"); +} + +TDeriviativeHistogram::TDeriviativeHistogram(const TString& moduleId, const TString& signalName, const TString& category, const std::set& values) + : TBase(moduleId) +{ + if (category) { + DeepSubGroup("category", category); + } + std::optional predName; + for (auto&& i : values) { + if (!predName) { + Counters.emplace(i, CreateSubGroup("bin", "(-Inf," + ::ToString(i) + "]").GetDeriviative(signalName)); + } else { + Counters.emplace(i, CreateSubGroup("bin", "(" + *predName + "," + ::ToString(i) + "]").GetDeriviative(signalName)); + } + predName = ::ToString(i); + } + Y_ABORT_UNLESS(predName); + PlusInf = CreateSubGroup("bin", "(" + *predName + ",+Inf)").GetDeriviative(signalName); +} + +TDeriviativeHistogram::TDeriviativeHistogram(const TString& moduleId, const TString& signalName, const TString& category, const std::map& values) + : TBase(moduleId) +{ + if (category) { + DeepSubGroup("category", category); + } + std::optional predName; + for (auto&& i : values) { + if (!predName) { + Counters.emplace(i.first, CreateSubGroup("bin", "(-Inf," + i.second + "]").GetDeriviative(signalName)); + } else { + Counters.emplace(i.first, CreateSubGroup("bin", "(" + *predName + "," + i.second + "]").GetDeriviative(signalName)); + } + predName = i.second; + } + Y_ABORT_UNLESS(predName); + PlusInf = CreateSubGroup("bin", "(" + *predName + ",+Inf)").GetDeriviative(signalName); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/counters/common/histogram.h b/ydb/core/tx/columnshard/counters/common/histogram.h new file mode 100644 index 000000000000..fcd1170ec142 --- /dev/null +++ b/ydb/core/tx/columnshard/counters/common/histogram.h @@ -0,0 +1,118 @@ +#pragma once +#include "owner.h" +#include +#include +#include + +namespace NKikimr::NColumnShard { + +class TIncrementalHistogram: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + std::map Counters; + NMonitoring::TDynamicCounters::TCounterPtr PlusInf; + + NMonitoring::TDynamicCounters::TCounterPtr GetQuantile(const i64 value) const { + auto it = Counters.lower_bound(value); + if (it == Counters.end()) { + return PlusInf; + } else { + return it->second; + } + } +public: + + class TGuard { + private: + class TLineGuard { + private: + NMonitoring::TDynamicCounters::TCounterPtr Counter; + i64 Value = 0; + public: + TLineGuard(NMonitoring::TDynamicCounters::TCounterPtr counter) + : Counter(counter) { + + } + + ~TLineGuard() { + Sub(Value); + } + + void Add(const i64 value) { + Counter->Add(value); + Value += value; + } + + void Sub(const i64 value) { + Counter->Sub(value); + Value -= value; + Y_ABORT_UNLESS(Value >= 0); + } + }; + + std::map Counters; + TLineGuard PlusInf; + + TLineGuard& GetLineGuard(const i64 value) { + auto it = Counters.lower_bound(value); + if (it == Counters.end()) { + return PlusInf; + } else { + return it->second; + } + } + public: + TGuard(const TIncrementalHistogram& owner) + : PlusInf(owner.PlusInf) { + for (auto&& i : owner.Counters) { + Counters.emplace(i.first, TLineGuard(i.second)); + } + } + void Add(const i64 value, const i64 count) { + GetLineGuard(value).Add(count); + } + + void Sub(const i64 value, const i64 count) { + GetLineGuard(value).Sub(count); + } + }; + + std::shared_ptr BuildGuard() const { + return std::make_shared(*this); + } + + TIncrementalHistogram(const TString& moduleId, const TString& metricId, const TString& category, const std::set& values); + + TIncrementalHistogram(const TString& moduleId, const TString& metricId, const TString& category, const std::map& values); + +}; + +class TDeriviativeHistogram: public TCommonCountersOwner { +private: + using TBase = TCommonCountersOwner; + std::map Counters; + NMonitoring::TDynamicCounters::TCounterPtr PlusInf; + + NMonitoring::TDynamicCounters::TCounterPtr GetQuantile(const i64 value) const { + auto it = Counters.lower_bound(value); + if (it == Counters.end()) { + return PlusInf; + } else { + return it->second; + } + } +public: + + void Collect(const i64 volume, const ui32 count = 1) const { + GetQuantile(volume)->Add(count); + + } + + TDeriviativeHistogram(const TString& moduleId, const TString& signalName, const TString& category, const std::set& values); + + TDeriviativeHistogram(const TString& moduleId, const TString& signalName, const TString& category, const std::map& values); + +}; + + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/counters/common/ya.make b/ydb/core/tx/columnshard/counters/common/ya.make index 804699ec52bd..b2e330fc8b9b 100644 --- a/ydb/core/tx/columnshard/counters/common/ya.make +++ b/ydb/core/tx/columnshard/counters/common/ya.make @@ -6,6 +6,7 @@ SRCS( owner.cpp private.cpp object_counter.cpp + histogram.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/counters/engine_logs.cpp b/ydb/core/tx/columnshard/counters/engine_logs.cpp index 07b2844e97de..e9daf966e196 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.cpp +++ b/ydb/core/tx/columnshard/counters/engine_logs.cpp @@ -2,6 +2,8 @@ #include #include #include +#include + #include namespace NKikimr::NColumnShard { @@ -38,35 +40,82 @@ TEngineLogsCounters::TEngineLogsCounters() PortionToDropCount = TBase::GetDeriviative("Ttl/PortionToDrop/Count"); PortionToDropBytes = TBase::GetDeriviative("Ttl/PortionToDrop/Bytes"); + PortionToDropLag = TBase::GetHistogram("Ttl/PortionToDrop/Lag/Duration", NMonitoring::ExponentialHistogram(18, 2, 5)); + SkipDeleteWithProcessMemory = TBase::GetHistogram("Ttl/PortionToDrop/Skip/ProcessMemory/Lag/Duration", NMonitoring::ExponentialHistogram(18, 2, 5)); + SkipDeleteWithTxLimit = TBase::GetHistogram("Ttl/PortionToDrop/Skip/TxLimit/Lag/Duration", NMonitoring::ExponentialHistogram(18, 2, 5)); PortionToEvictCount = TBase::GetDeriviative("Ttl/PortionToEvict/Count"); PortionToEvictBytes = TBase::GetDeriviative("Ttl/PortionToEvict/Bytes"); + PortionToEvictLag = TBase::GetHistogram("Ttl/PortionToEvict/Lag/Duration", NMonitoring::ExponentialHistogram(18, 2, 5)); + SkipEvictionWithProcessMemory = TBase::GetHistogram("Ttl/PortionToEvict/Skip/ProcessMemory/Lag/Duration", NMonitoring::ExponentialHistogram(18, 2, 5)); + SkipEvictionWithTxLimit = TBase::GetHistogram("Ttl/PortionToEvict/Skip/TxLimit/Lag/Duration", NMonitoring::ExponentialHistogram(18, 2, 5)); + + ActualizationTaskSizeRemove = TBase::GetHistogram("Actualization/RemoveTasks/Size", NMonitoring::ExponentialHistogram(18, 2)); + ActualizationTaskSizeEvict = TBase::GetHistogram("Actualization/EvictTasks/Size", NMonitoring::ExponentialHistogram(18, 2)); + + ActualizationSkipRWProgressCount = TBase::GetDeriviative("Actualization/Skip/RWProgress/Count"); + ActualizationSkipTooFreshPortion = TBase::GetHistogram("Actualization//Skip/TooFresh/Duration", NMonitoring::LinearHistogram(12, 0, 360)); PortionNoTtlColumnCount = TBase::GetDeriviative("Ttl/PortionNoTtlColumn/Count"); PortionNoTtlColumnBytes = TBase::GetDeriviative("Ttl/PortionNoTtlColumn/Bytes"); PortionNoBorderCount = TBase::GetDeriviative("Ttl/PortionNoBorder/Count"); PortionNoBorderBytes = TBase::GetDeriviative("Ttl/PortionNoBorder/Bytes"); + + GranuleOptimizerLocked = TBase::GetDeriviative("Optimizer/Granules/Locked"); + + IndexMetadataUsageBytes = TBase::GetValue("IndexMetadata/Usage/Bytes"); + + StatUsageForTTLCount = TBase::GetDeriviative("Ttl/StatUsageForTTLCount/Count"); + ChunkUsageForTTLCount = TBase::GetDeriviative("Ttl/ChunkUsageForTTLCount/Count"); +} + +void TEngineLogsCounters::OnActualizationTask(const ui32 evictCount, const ui32 removeCount) const { + AFL_VERIFY(evictCount * removeCount == 0)("evict", evictCount)("remove", removeCount); + AFL_VERIFY(evictCount + removeCount); + if (evictCount) { + ActualizationTaskSizeEvict->Collect(evictCount); + } else { + ActualizationTaskSizeRemove->Collect(removeCount); + } } void TEngineLogsCounters::TPortionsInfoGuard::OnNewPortion(const std::shared_ptr& portion) const { const ui32 producedId = (ui32)(portion->HasRemoveSnapshot() ? NOlap::NPortion::EProduced::INACTIVE : portion->GetMeta().Produced); Y_ABORT_UNLESS(producedId < BlobGuards.size()); - for (auto&& i : portion->GetBlobIds()) { - BlobGuards[producedId]->Add(i.BlobSize(), i.BlobSize()); + for (auto&& i : portion->GetRecords()) { + BlobGuards[producedId]->Add(i.GetBlobRange().Size, i.GetBlobRange().Size); + } + for (auto&& i : portion->GetIndexes()) { + BlobGuards[producedId]->Add(i.GetBlobRange().Size, i.GetBlobRange().Size); } PortionRecordCountGuards[producedId]->Add(portion->GetRecordsCount(), 1); - PortionSizeGuards[producedId]->Add(portion->GetBlobBytes(), 1); + PortionSizeGuards[producedId]->Add(portion->GetTotalBlobBytes(), 1); } void TEngineLogsCounters::TPortionsInfoGuard::OnDropPortion(const std::shared_ptr& portion) const { const ui32 producedId = (ui32)(portion->HasRemoveSnapshot() ? NOlap::NPortion::EProduced::INACTIVE : portion->GetMeta().Produced); Y_ABORT_UNLESS(producedId < BlobGuards.size()); - for (auto&& i : portion->GetBlobIds()) { - BlobGuards[producedId]->Sub(i.BlobSize(), i.BlobSize()); + for (auto&& i : portion->GetRecords()) { + BlobGuards[producedId]->Sub(i.GetBlobRange().Size, i.GetBlobRange().Size); + } + for (auto&& i : portion->GetIndexes()) { + BlobGuards[producedId]->Sub(i.GetBlobRange().Size, i.GetBlobRange().Size); } PortionRecordCountGuards[producedId]->Sub(portion->GetRecordsCount(), 1); - PortionSizeGuards[producedId]->Sub(portion->GetBlobBytes(), 1); + PortionSizeGuards[producedId]->Sub(portion->GetTotalBlobBytes(), 1); +} + +NKikimr::NColumnShard::TBaseGranuleDataClassSummary TBaseGranuleDataClassSummary::operator+(const TBaseGranuleDataClassSummary& item) const { + TBaseGranuleDataClassSummary result; + result.TotalPortionsSize = TotalPortionsSize + item.TotalPortionsSize; + result.ColumnPortionsSize = ColumnPortionsSize + item.ColumnPortionsSize; + AFL_VERIFY(result.TotalPortionsSize >= 0); + AFL_VERIFY(result.ColumnPortionsSize >= 0); + result.PortionsCount = PortionsCount + item.PortionsCount; + result.RecordsCount = RecordsCount + item.RecordsCount; + result.MetadataMemoryPortionsSize = MetadataMemoryPortionsSize + item.MetadataMemoryPortionsSize; + return result; } } diff --git a/ydb/core/tx/columnshard/counters/engine_logs.h b/ydb/core/tx/columnshard/counters/engine_logs.h index 20aa0a9757c8..97a4716652c3 100644 --- a/ydb/core/tx/columnshard/counters/engine_logs.h +++ b/ydb/core/tx/columnshard/counters/engine_logs.h @@ -1,5 +1,6 @@ #pragma once #include "common/owner.h" +#include "common/histogram.h" #include #include #include @@ -13,12 +14,17 @@ namespace NKikimr::NColumnShard { class TBaseGranuleDataClassSummary { protected: - i64 PortionsSize = 0; + i64 ColumnPortionsSize = 0; + i64 TotalPortionsSize = 0; i64 PortionsCount = 0; i64 RecordsCount = 0; + i64 MetadataMemoryPortionsSize = 0; public: - i64 GetPortionsSize() const { - return PortionsSize; + i64 GetColumnPortionsSize() const { + return ColumnPortionsSize; + } + i64 GetTotalPortionsSize() const { + return TotalPortionsSize; } i64 GetRecordsCount() const { return RecordsCount; @@ -26,18 +32,21 @@ class TBaseGranuleDataClassSummary { i64 GetPortionsCount() const { return PortionsCount; } + i64 GetMetadataMemoryPortionsSize() const { + return MetadataMemoryPortionsSize; + } TString DebugString() const { - return TStringBuilder() << "size:" << PortionsSize << ";count:" << PortionsCount << ";"; + return TStringBuilder() << + "columns_size:" << ColumnPortionsSize << + ";total_size:" << TotalPortionsSize << + ";count:" << PortionsCount << + ";metadata_portions_size:" << MetadataMemoryPortionsSize << + ";records_count:" << RecordsCount << + ";"; } - TBaseGranuleDataClassSummary operator+(const TBaseGranuleDataClassSummary& item) const { - TBaseGranuleDataClassSummary result; - result.PortionsSize = PortionsSize + item.PortionsSize; - result.PortionsCount = PortionsCount + item.PortionsCount; - result.RecordsCount = RecordsCount + item.RecordsCount; - return result; - } + TBaseGranuleDataClassSummary operator+(const TBaseGranuleDataClassSummary& item) const; }; class TDataClassCounters { @@ -53,7 +62,7 @@ class TDataClassCounters { } void OnPortionsInfo(const TBaseGranuleDataClassSummary& dataInfo) const { - PortionsSize->SetValue(dataInfo.GetPortionsSize()); + PortionsSize->SetValue(dataInfo.GetTotalPortionsSize()); PortionsCount->SetValue(dataInfo.GetPortionsCount()); } }; @@ -113,143 +122,45 @@ class TAgentGranuleDataCounters { } }; -class TIncrementalHistogram: public TCommonCountersOwner { -private: - using TBase = TCommonCountersOwner; - std::map Counters; - NMonitoring::TDynamicCounters::TCounterPtr PlusInf; - - NMonitoring::TDynamicCounters::TCounterPtr GetQuantile(const i64 value) const { - auto it = Counters.lower_bound(value); - if (it == Counters.end()) { - return PlusInf; - } else { - return it->second; - } - } -public: - - class TGuard { - private: - class TLineGuard { - private: - NMonitoring::TDynamicCounters::TCounterPtr Counter; - i64 Value = 0; - public: - TLineGuard(NMonitoring::TDynamicCounters::TCounterPtr counter) - : Counter(counter) - { - - } - - ~TLineGuard() { - Sub(Value); - } - - void Add(const i64 value) { - Counter->Add(value); - Value += value; - } - - void Sub(const i64 value) { - Counter->Sub(value); - Value -= value; - Y_ABORT_UNLESS(Value >= 0); - } - }; - - std::map Counters; - TLineGuard PlusInf; - - TLineGuard& GetLineGuard(const i64 value) { - auto it = Counters.lower_bound(value); - if (it == Counters.end()) { - return PlusInf; - } else { - return it->second; - } - } - public: - TGuard(const TIncrementalHistogram& owner) - : PlusInf(owner.PlusInf) - { - for (auto&& i : owner.Counters) { - Counters.emplace(i.first, TLineGuard(i.second)); - } - } - void Add(const i64 value, const i64 count) { - GetLineGuard(value).Add(count); - } - - void Sub(const i64 value, const i64 count) { - GetLineGuard(value).Sub(count); - } - }; - - std::shared_ptr BuildGuard() const { - return std::make_shared(*this); - } - - TIncrementalHistogram(const TString& moduleId, const TString& metricId, const TString& category, const std::set& values) - : TBase(moduleId) { - DeepSubGroup("metric", metricId); - if (category) { - DeepSubGroup("category", category); - } - std::optional predName; - for (auto&& i : values) { - if (!predName) { - Counters.emplace(i, TBase::GetValue("(-Inf," + ::ToString(i) + "]")); - } else { - Counters.emplace(i, TBase::GetValue("(" + *predName + "," + ::ToString(i) + "]")); - } - predName = ::ToString(i); - } - Y_ABORT_UNLESS(predName); - PlusInf = TBase::GetValue("(" + *predName + ",+Inf)"); - } - - TIncrementalHistogram(const TString& moduleId, const TString& metricId, const TString& category, const std::map& values) - : TBase(moduleId) - { - DeepSubGroup("metric", metricId); - if (category) { - DeepSubGroup("category", category); - } - std::optional predName; - for (auto&& i : values) { - if (!predName) { - Counters.emplace(i.first, TBase::GetValue("(-Inf," + i.second + "]")); - } else { - Counters.emplace(i.first, TBase::GetValue("(" + *predName + "," + i.second + "]")); - } - predName = i.second; - } - Y_ABORT_UNLESS(predName); - PlusInf = TBase::GetValue("(" + *predName + ",+Inf)"); - } - -}; - class TEngineLogsCounters: public TCommonCountersOwner { private: using TBase = TCommonCountersOwner; NMonitoring::TDynamicCounters::TCounterPtr PortionToDropCount; NMonitoring::TDynamicCounters::TCounterPtr PortionToDropBytes; + NMonitoring::THistogramPtr PortionToDropLag; + NMonitoring::THistogramPtr SkipDeleteWithProcessMemory; + NMonitoring::THistogramPtr SkipDeleteWithTxLimit; NMonitoring::TDynamicCounters::TCounterPtr PortionToEvictCount; NMonitoring::TDynamicCounters::TCounterPtr PortionToEvictBytes; + NMonitoring::THistogramPtr PortionToEvictLag; + NMonitoring::THistogramPtr SkipEvictionWithProcessMemory; + NMonitoring::THistogramPtr SkipEvictionWithTxLimit; + + NMonitoring::THistogramPtr ActualizationTaskSizeRemove; + NMonitoring::THistogramPtr ActualizationTaskSizeEvict; + + NMonitoring::TDynamicCounters::TCounterPtr ActualizationSkipRWProgressCount; + NMonitoring::THistogramPtr ActualizationSkipTooFreshPortion; NMonitoring::TDynamicCounters::TCounterPtr PortionNoTtlColumnCount; NMonitoring::TDynamicCounters::TCounterPtr PortionNoTtlColumnBytes; + NMonitoring::TDynamicCounters::TCounterPtr StatUsageForTTLCount; + NMonitoring::TDynamicCounters::TCounterPtr ChunkUsageForTTLCount; + NMonitoring::TDynamicCounters::TCounterPtr PortionNoBorderCount; NMonitoring::TDynamicCounters::TCounterPtr PortionNoBorderBytes; + NMonitoring::TDynamicCounters::TCounterPtr GranuleOptimizerLocked; + + NMonitoring::TDynamicCounters::TCounterPtr IndexMetadataUsageBytes; + TAgentGranuleDataCounters GranuleDataAgent; std::vector> BlobSizeDistribution; std::vector> PortionSizeDistribution; std::vector> PortionRecordsDistribution; + public: class TPortionsInfoGuard { @@ -279,6 +190,8 @@ class TEngineLogsCounters: public TCommonCountersOwner { }; + void OnActualizationTask(const ui32 evictCount, const ui32 removeCount) const; + TPortionsInfoGuard BuildPortionBlobsGuard() const { return TPortionsInfoGuard(BlobSizeDistribution, PortionSizeDistribution, PortionRecordsDistribution); } @@ -287,14 +200,40 @@ class TEngineLogsCounters: public TCommonCountersOwner { return GranuleDataAgent.RegisterClient(); } - void OnPortionToEvict(const ui64 size) const { + void OnActualizationSkipRWProgress() const { + ActualizationSkipRWProgressCount->Add(1); + } + + void OnActualizationSkipTooFreshPortion(const TDuration dWait) const { + ActualizationSkipTooFreshPortion->Collect(dWait.Seconds()); + } + + void OnSkipDeleteWithProcessMemory(const TDuration lag) const { + SkipDeleteWithProcessMemory->Collect(lag.Seconds()); + } + + void OnSkipDeleteWithTxLimit(const TDuration lag) const { + SkipDeleteWithTxLimit->Collect(lag.Seconds()); + } + + void OnSkipEvictionWithProcessMemory(const TDuration lag) const { + SkipEvictionWithProcessMemory->Collect(lag.Seconds()); + } + + void OnSkipEvictionWithTxLimit(const TDuration lag) const { + SkipEvictionWithTxLimit->Collect(lag.Seconds()); + } + + void OnPortionToEvict(const ui64 size, const TDuration lag) const { PortionToEvictCount->Add(1); PortionToEvictBytes->Add(size); + PortionToEvictLag->Collect(lag.Seconds()); } - void OnPortionToDrop(const ui64 size) const { + void OnPortionToDrop(const ui64 size, const TDuration lag) const { PortionToDropCount->Add(1); PortionToDropBytes->Add(size); + PortionToDropLag->Collect(lag.Seconds()); } void OnPortionNoTtlColumn(const ui64 size) const { @@ -302,11 +241,27 @@ class TEngineLogsCounters: public TCommonCountersOwner { PortionNoTtlColumnBytes->Add(size); } + void OnChunkUsageForTTL() const { + ChunkUsageForTTLCount->Add(1); + } + + void OnStatUsageForTTL() const { + StatUsageForTTLCount->Add(1); + } + void OnPortionNoBorder(const ui64 size) const { PortionNoBorderCount->Add(1); PortionNoBorderBytes->Add(size); } + void OnIndexMetadataUsageBytes(const ui64 size) const { + IndexMetadataUsageBytes->Set(size); + } + + void OnGranuleOptimizerLocked() const { + GranuleOptimizerLocked->Add(1); + } + TEngineLogsCounters(); }; diff --git a/ydb/core/tx/columnshard/counters/indexation.cpp b/ydb/core/tx/columnshard/counters/indexation.cpp index cbe62ae08259..d8ee87920d23 100644 --- a/ydb/core/tx/columnshard/counters/indexation.cpp +++ b/ydb/core/tx/columnshard/counters/indexation.cpp @@ -8,6 +8,7 @@ TIndexationCounters::TIndexationCounters(const TString& module) : TBase(module) { ReadBytes = TBase::GetDeriviative("Read/Bytes"); + ReadErrors = TBase::GetDeriviative("Read/Errors/Count"); AnalizeInsertedPortions = TBase::GetDeriviative("AnalizeInsertion/Portions"); AnalizeInsertedBytes = TBase::GetDeriviative("AnalizeInsertion/Bytes"); RepackedInsertedPortions = TBase::GetDeriviative("RepackedInsertion/Portions"); diff --git a/ydb/core/tx/columnshard/counters/indexation.h b/ydb/core/tx/columnshard/counters/indexation.h index 4b926d260910..fdbbc659492b 100644 --- a/ydb/core/tx/columnshard/counters/indexation.h +++ b/ydb/core/tx/columnshard/counters/indexation.h @@ -13,6 +13,7 @@ class TIndexationCounters: public TCommonCountersOwner { public: NMonitoring::TDynamicCounters::TCounterPtr CompactionInputBytes; + NMonitoring::TDynamicCounters::TCounterPtr ReadErrors; NMonitoring::TDynamicCounters::TCounterPtr ReadBytes; NMonitoring::TDynamicCounters::TCounterPtr AnalizeCompactedPortions; NMonitoring::TDynamicCounters::TCounterPtr AnalizeInsertedPortions; diff --git a/ydb/core/tx/columnshard/counters/scan.cpp b/ydb/core/tx/columnshard/counters/scan.cpp index bf279913648e..075aa0e880ec 100644 --- a/ydb/core/tx/columnshard/counters/scan.cpp +++ b/ydb/core/tx/columnshard/counters/scan.cpp @@ -16,8 +16,6 @@ TScanCounters::TScanCounters(const TString& module) , NoResultsAckRequest(TBase::GetDeriviative("NoResultsAckRequest")) , AckWaitingDuration(TBase::GetDeriviative("AckWaitingDuration")) - , ScanDuration(TBase::GetDeriviative("ScanDuration")) - , NoScanRecords(TBase::GetDeriviative("NoScanRecords")) , NoScanIntervals(TBase::GetDeriviative("NoScanIntervals")) , LinearScanRecords(TBase::GetDeriviative("LinearScanRecords")) @@ -61,8 +59,44 @@ TScanCounters::TScanCounters(const TString& module) , BlobsReceivedCount(TBase::GetDeriviative("BlobsReceivedCount")) , BlobsReceivedBytes(TBase::GetDeriviative("BlobsReceivedBytes")) { + HistogramIntervalMemoryRequiredOnFail = TBase::GetHistogram("IntervalMemory/RequiredOnFail/Gb", NMonitoring::LinearHistogram(10, 1, 1)); + HistogramIntervalMemoryReduceSize = TBase::GetHistogram("IntervalMemory/Reduce/Gb", NMonitoring::ExponentialHistogram(8, 2, 1)); + HistogramIntervalMemoryRequiredAfterReduce = TBase::GetHistogram("IntervalMemory/RequiredAfterReduce/Mb", NMonitoring::ExponentialHistogram(10, 2, 64)); +/* + { + const std::map borders = {{0, "0"}, {512LLU * 1024 * 1024, "0.5Gb"}, {1LLU * 1024 * 1024 * 1024, "1Gb"}, + {2LLU * 1024 * 1024 * 1024, "2Gb"}, {3LLU * 1024 * 1024 * 1024, "3Gb"}, + {4LLU * 1024 * 1024 * 1024, "4Gb"}, {5LLU * 1024 * 1024 * 1024, "5Gb"}, + {6LLU * 1024 * 1024 * 1024, "6Gb"}, {7LLU * 1024 * 1024 * 1024, "7Gb"}, {8LLU * 1024 * 1024 * 1024, "8Gb"}}; + HistogramIntervalMemoryRequiredOnFail = std::make_shared(module, "IntervalMemory/RequiredOnFail/Bytes", "", borders); + } + { + const std::map borders = {{0, "0"}, {512LLU * 1024 * 1024, "0.5Gb"}, {1LLU * 1024 * 1024 * 1024, "1Gb"}, + {2LLU * 1024 * 1024 * 1024, "2Gb"}, + {4LLU * 1024 * 1024 * 1024, "4Gb"}, + {8LLU * 1024 * 1024 * 1024, "8Gb"}, {16LLU * 1024 * 1024 * 1024, "16Gb"}, {32LLU * 1024 * 1024 * 1024, "32Gb"}}; + HistogramIntervalMemoryReduceSize = std::make_shared(module, "IntervalMemory/Reduce/Bytes", "", borders); + } + { + const std::map borders = {{0, "0"}, {64LLU * 1024 * 1024, "64Mb"}, + {128LLU * 1024 * 1024, "128Mb"}, {256LLU * 1024 * 1024, "256Mb"}, {512LLU * 1024 * 1024, "512Mb"}, + {1024LLU * 1024 * 1024, "1024Mb"}, {2LLU * 1024 * 1024 * 1024, "2Gb"}, {3LLU * 1024 * 1024 * 1024, "3Gb"} + }; + HistogramIntervalMemoryRequiredAfterReduce = std::make_shared(module, "IntervalMemory/RequiredAfterReduce/Bytes", "", borders); + } +*/ + ScanIntervalState = std::make_shared(*this); ResourcesSubscriberCounters = std::make_shared(); - + ScanDurationByStatus.resize((ui32)EStatusFinish::COUNT); + ui32 idx = 0; + for (auto&& i : GetEnumAllValues()) { + if (i == EStatusFinish::COUNT) { + continue; + } + ScanDurationByStatus[(ui32)i] = TBase::GetHistogram("ScanDuration/" + ::ToString(i) + "/Milliseconds", NMonitoring::ExponentialHistogram(18, 2, 1)); + AFL_VERIFY(idx == (ui32)i); + ++idx; + } } NKikimr::NColumnShard::TScanAggregations TScanCounters::BuildAggregations() { diff --git a/ydb/core/tx/columnshard/counters/scan.h b/ydb/core/tx/columnshard/counters/scan.h index fd08804b10f6..d9bbd6b898ce 100644 --- a/ydb/core/tx/columnshard/counters/scan.h +++ b/ydb/core/tx/columnshard/counters/scan.h @@ -1,5 +1,6 @@ #pragma once #include "common/owner.h" +#include "common/histogram.h" #include #include #include @@ -40,6 +41,77 @@ class TScanAggregations: public TCommonCountersOwner { }; class TScanCounters: public TCommonCountersOwner { +public: + enum class EIntervalStatus { + Undefined = 0, + WaitResources, + WaitSources, + WaitMergerStart, + WaitMergerContinue, + WaitPartialReply, + + COUNT + }; + + enum class EStatusFinish { + Success /* "Success" */ = 0, + ConveyorInternalError /* "ConveyorInternalError" */, + ExternalAbort /* "ExternalAbort" */, + IteratorInternalErrorScan /* "IteratorInternalErrorScan" */, + IteratorInternalErrorResult /* "IteratorInternalErrorResult" */, + Deadline /* "Deadline" */, + UndeliveredEvent /* "UndeliveredEvent" */, + CannotAddInFlight /* "CannotAddInFlight" */, + ProblemOnStart /*ProblemOnStart*/, + + COUNT + }; + + class TScanIntervalState { + private: + std::vector ValuesByStatus; + public: + TScanIntervalState(const TScanCounters& counters) { + ValuesByStatus.resize((ui32)EIntervalStatus::COUNT); + for (auto&& i : GetEnumAllValues()) { + if (i == EIntervalStatus::COUNT) { + continue; + } + ValuesByStatus[(ui32)i] = counters.CreateSubGroup("status", ::ToString(i)).GetValue("Intervals/Count"); + } + } + void Add(const EIntervalStatus status) const { + AFL_VERIFY((ui32)status < ValuesByStatus.size()); + ValuesByStatus[(ui32)status]->Add(1); + } + void Remove(const EIntervalStatus status) const { + AFL_VERIFY((ui32)status < ValuesByStatus.size()); + ValuesByStatus[(ui32)status]->Sub(1); + } + }; + + class TScanIntervalStateGuard { + private: + EIntervalStatus Status = EIntervalStatus::Undefined; + const std::shared_ptr BaseCounters; + public: + TScanIntervalStateGuard(const std::shared_ptr& baseCounters) + : BaseCounters(baseCounters) + { + BaseCounters->Add(Status); + } + + ~TScanIntervalStateGuard() { + BaseCounters->Remove(Status); + } + + void SetStatus(const EIntervalStatus status) { + BaseCounters->Remove(Status); + Status = status; + BaseCounters->Add(Status); + } + }; + private: using TBase = TCommonCountersOwner; NMonitoring::TDynamicCounters::TCounterPtr ProcessingOverload; @@ -54,7 +126,7 @@ class TScanCounters: public TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr NoResultsAckRequest; NMonitoring::TDynamicCounters::TCounterPtr AckWaitingDuration; - NMonitoring::TDynamicCounters::TCounterPtr ScanDuration; + std::vector ScanDurationByStatus; NMonitoring::TDynamicCounters::TCounterPtr NoScanRecords; NMonitoring::TDynamicCounters::TCounterPtr NoScanIntervals; @@ -62,11 +134,18 @@ class TScanCounters: public TCommonCountersOwner { NMonitoring::TDynamicCounters::TCounterPtr LinearScanIntervals; NMonitoring::TDynamicCounters::TCounterPtr LogScanRecords; NMonitoring::TDynamicCounters::TCounterPtr LogScanIntervals; + std::shared_ptr ScanIntervalState; + NMonitoring::THistogramPtr HistogramIntervalMemoryRequiredOnFail; + NMonitoring::THistogramPtr HistogramIntervalMemoryReduceSize; + NMonitoring::THistogramPtr HistogramIntervalMemoryRequiredAfterReduce; public: - std::shared_ptr ResourcesSubscriberCounters; + TScanIntervalStateGuard CreateIntervalStateGuard() const { + return TScanIntervalStateGuard(ScanIntervalState); + } + std::shared_ptr ResourcesSubscriberCounters; NMonitoring::TDynamicCounters::TCounterPtr PortionBytes; NMonitoring::TDynamicCounters::TCounterPtr FilterBytes; @@ -106,6 +185,18 @@ class TScanCounters: public TCommonCountersOwner { TScanCounters(const TString& module = "Scan"); + void OnOptimizedIntervalMemoryFailed(const ui64 memoryRequired) const { + HistogramIntervalMemoryRequiredOnFail->Collect(memoryRequired / (1024.0 * 1024.0 * 1024.0)); + } + + void OnOptimizedIntervalMemoryReduced(const ui64 memoryReduceVolume) const { + HistogramIntervalMemoryReduceSize->Collect(memoryReduceVolume / (1024.0 * 1024.0 * 1024.0)); + } + + void OnOptimizedIntervalMemoryRequired(const ui64 memoryRequired) const { + HistogramIntervalMemoryRequiredAfterReduce->Collect(memoryRequired / (1024.0 * 1024.0)); + } + void OnNoScanInterval(const ui32 recordsCount) const { NoScanRecords->Add(recordsCount); NoScanIntervals->Add(1); @@ -121,8 +212,9 @@ class TScanCounters: public TCommonCountersOwner { LogScanIntervals->Add(1); } - void OnScanDuration(const TDuration d) const { - ScanDuration->Add(d.MicroSeconds()); + void OnScanDuration(const EStatusFinish status, const TDuration d) const { + AFL_VERIFY((ui32)status < ScanDurationByStatus.size()); + ScanDurationByStatus[(ui32)status]->Collect(d.MilliSeconds()); } void AckWaitingInfo(const TDuration d) const { diff --git a/ydb/core/tx/columnshard/counters/ya.make b/ydb/core/tx/columnshard/counters/ya.make index 5a0dbc7c3256..65797cb34752 100644 --- a/ydb/core/tx/columnshard/counters/ya.make +++ b/ydb/core/tx/columnshard/counters/ya.make @@ -18,5 +18,6 @@ PEERDIR( ) GENERATE_ENUM_SERIALIZATION(columnshard.h) +GENERATE_ENUM_SERIALIZATION(scan.h) END() diff --git a/ydb/core/tx/columnshard/data_locks/locks/abstract.cpp b/ydb/core/tx/columnshard/data_locks/locks/abstract.cpp new file mode 100644 index 000000000000..d5c44aa83685 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/abstract.cpp @@ -0,0 +1,5 @@ +#include "abstract.h" + +namespace NKikimr::NOlap::NDataLocks { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/locks/abstract.h b/ydb/core/tx/columnshard/data_locks/locks/abstract.h new file mode 100644 index 000000000000..2e301d62a512 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/abstract.h @@ -0,0 +1,52 @@ +#pragma once +#include + +#include + +#include +#include +#include + +namespace NKikimr::NOlap { +class TPortionInfo; +class TGranuleMeta; +} + +namespace NKikimr::NOlap::NDataLocks { + +class ILock { +private: + YDB_READONLY_DEF(TString, LockName); + YDB_READONLY_FLAG(ReadOnly, false); +protected: + virtual std::optional DoIsLocked(const TPortionInfo& portion) const = 0; + virtual std::optional DoIsLocked(const TGranuleMeta& granule) const = 0; + virtual bool DoIsEmpty() const = 0; +public: + ILock(const TString& lockName, const bool isReadOnly = false) + : LockName(lockName) + , ReadOnlyFlag(isReadOnly) + { + + } + + virtual ~ILock() = default; + + std::optional IsLocked(const TPortionInfo& portion, const bool readOnly = false) const { + if (IsReadOnly() && readOnly) { + return {}; + } + return DoIsLocked(portion); + } + std::optional IsLocked(const TGranuleMeta& g, const bool readOnly = false) const { + if (IsReadOnly() && readOnly) { + return {}; + } + return DoIsLocked(g); + } + bool IsEmpty() const { + return DoIsEmpty(); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/locks/composite.cpp b/ydb/core/tx/columnshard/data_locks/locks/composite.cpp new file mode 100644 index 000000000000..0663ff68fb20 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/composite.cpp @@ -0,0 +1,5 @@ +#include "composite.h" + +namespace NKikimr::NOlap::NDataLocks { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/locks/composite.h b/ydb/core/tx/columnshard/data_locks/locks/composite.h new file mode 100644 index 000000000000..fd23ee15f85e --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/composite.h @@ -0,0 +1,54 @@ +#pragma once +#include "abstract.h" + +namespace NKikimr::NOlap::NDataLocks { + +class TCompositeLock: public ILock { +private: + using TBase = ILock; + std::vector> Locks; +protected: + virtual std::optional DoIsLocked(const TPortionInfo& portion) const override { + for (auto&& i : Locks) { + if (auto lockName = i->IsLocked(portion)) { + return lockName; + } + } + return {}; + } + virtual std::optional DoIsLocked(const TGranuleMeta& granule) const override { + for (auto&& i : Locks) { + if (auto lockName = i->IsLocked(granule)) { + return lockName; + } + } + return {}; + } + bool DoIsEmpty() const override { + return Locks.empty(); + } +public: + TCompositeLock(const TString& lockName, const std::vector>& locks, const bool readOnly = false) + : TBase(lockName, readOnly) + { + for (auto&& l : locks) { + if (!l || l->IsEmpty()) { + continue; + } + Locks.emplace_back(l); + } + } + + TCompositeLock(const TString& lockName, std::initializer_list> locks, const bool readOnly = false) + : TBase(lockName, readOnly) + { + for (auto&& l : locks) { + if (!l || l->IsEmpty()) { + continue; + } + Locks.emplace_back(l); + } + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/locks/list.cpp b/ydb/core/tx/columnshard/data_locks/locks/list.cpp new file mode 100644 index 000000000000..c0dbcb9ac327 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/list.cpp @@ -0,0 +1,5 @@ +#include "list.h" + +namespace NKikimr::NOlap::NDataLocks { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/locks/list.h b/ydb/core/tx/columnshard/data_locks/locks/list.h new file mode 100644 index 000000000000..e74251e30b42 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/list.h @@ -0,0 +1,96 @@ +#pragma once +#include "abstract.h" +#include +#include + +namespace NKikimr::NOlap::NDataLocks { + +class TListPortionsLock: public ILock { +private: + using TBase = ILock; + THashSet Portions; + THashSet Granules; +protected: + virtual std::optional DoIsLocked(const TPortionInfo& portion) const override { + if (Portions.contains(portion.GetAddress())) { + return GetLockName(); + } + return {}; + } + virtual std::optional DoIsLocked(const TGranuleMeta& granule) const override { + if (Granules.contains(granule.GetPathId())) { + return GetLockName(); + } + return {}; + } + bool DoIsEmpty() const override { + return Portions.empty(); + } +public: + TListPortionsLock(const TString& lockName, const std::vector>& portions, const bool readOnly = false) + : TBase(lockName, readOnly) + { + for (auto&& p : portions) { + Portions.emplace(p->GetAddress()); + Granules.emplace(p->GetPathId()); + } + } + + TListPortionsLock(const TString& lockName, const std::vector& portions, const bool readOnly = false) + : TBase(lockName, readOnly) { + for (auto&& p : portions) { + Portions.emplace(p.GetAddress()); + Granules.emplace(p.GetPathId()); + } + } + + template + TListPortionsLock(const TString& lockName, const std::vector& portions, const TGetter& g, const bool readOnly = false) + : TBase(lockName, readOnly) { + for (auto&& p : portions) { + const auto address = g(p); + Portions.emplace(address); + Granules.emplace(address.GetPathId()); + } + } + + template + TListPortionsLock(const TString& lockName, const THashMap& portions, const bool readOnly = false) + : TBase(lockName, readOnly) { + for (auto&& p : portions) { + const auto address = p.first; + Portions.emplace(address); + Granules.emplace(address.GetPathId()); + } + } +}; + +class TListTablesLock: public ILock { +private: + using TBase = ILock; + THashSet Tables; +protected: + virtual std::optional DoIsLocked(const TPortionInfo& portion) const override { + if (Tables.contains(portion.GetPathId())) { + return GetLockName(); + } + return {}; + } + virtual std::optional DoIsLocked(const TGranuleMeta& granule) const override { + if (Tables.contains(granule.GetPathId())) { + return GetLockName(); + } + return {}; + } + bool DoIsEmpty() const override { + return Tables.empty(); + } +public: + TListTablesLock(const TString& lockName, const THashSet& tables, const bool readOnly = false) + : TBase(lockName, readOnly) + , Tables(tables) + { + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/locks/snapshot.cpp b/ydb/core/tx/columnshard/data_locks/locks/snapshot.cpp new file mode 100644 index 000000000000..f2d702268364 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/snapshot.cpp @@ -0,0 +1,5 @@ +#include "snapshot.h" + +namespace NKikimr::NOlap::NDataLocks { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/locks/snapshot.h b/ydb/core/tx/columnshard/data_locks/locks/snapshot.h new file mode 100644 index 000000000000..c1f6e10b06e7 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/snapshot.h @@ -0,0 +1,36 @@ +#pragma once +#include "abstract.h" +#include +#include + +namespace NKikimr::NOlap::NDataLocks { + +class TSnapshotLock: public ILock { +private: + using TBase = ILock; + const TSnapshot SnapshotBarrier; + const THashSet PathIds; +protected: + virtual std::optional DoIsLocked(const TPortionInfo& portion) const override { + if (PathIds.contains((TTabletId)portion.GetPathId()) && portion.RecordSnapshotMin() <= SnapshotBarrier) { + return GetLockName(); + } + return {}; + } + virtual std::optional DoIsLocked(const TGranuleMeta& granule) const override { + if (PathIds.contains((TTabletId)granule.GetPathId())) { + return GetLockName(); + } + return {}; + } +public: + TSnapshotLock(const TString& lockName, const TSnapshot& snapshotBarrier, const THashSet& pathIds, const bool readOnly = false) + : TBase(lockName, readOnly) + , SnapshotBarrier(snapshotBarrier) + , PathIds(pathIds) + { + + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/locks/ya.make b/ydb/core/tx/columnshard/data_locks/locks/ya.make new file mode 100644 index 000000000000..debee52e8724 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/locks/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + abstract.cpp + list.cpp + snapshot.cpp + composite.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/storage +) + +END() diff --git a/ydb/core/tx/columnshard/data_locks/manager/manager.cpp b/ydb/core/tx/columnshard/data_locks/manager/manager.cpp new file mode 100644 index 000000000000..d97912d594f3 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/manager/manager.cpp @@ -0,0 +1,48 @@ +#include "manager.h" +#include + +namespace NKikimr::NOlap::NDataLocks { + +std::shared_ptr TManager::RegisterLock(const std::shared_ptr& lock) { + AFL_VERIFY(lock); + AFL_VERIFY(ProcessLocks.emplace(lock->GetLockName(), lock).second)("process_id", lock->GetLockName()); + return std::make_shared(lock->GetLockName(), StopFlag); +} + +void TManager::UnregisterLock(const TString& processId) { + AFL_VERIFY(ProcessLocks.erase(processId))("process_id", processId); +} + +std::optional TManager::IsLocked(const TPortionInfo& portion) const { + for (auto&& i : ProcessLocks) { + if (auto lockName = i.second->IsLocked(portion)) { + return lockName; + } + } + return {}; +} + +std::optional TManager::IsLocked(const TGranuleMeta& granule) const { + for (auto&& i : ProcessLocks) { + if (auto lockName = i.second->IsLocked(granule)) { + return lockName; + } + } + return {}; +} + +void TManager::Stop() { + AFL_VERIFY(StopFlag->Inc() == 1); +} + +TManager::TGuard::~TGuard() { + AFL_VERIFY(Released || !NActors::TlsActivationContext || StopFlag->Val() == 1); +} + +void TManager::TGuard::Release(TManager& manager) { + AFL_VERIFY(!Released); + manager.UnregisterLock(ProcessId); + Released = true; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/manager/manager.h b/ydb/core/tx/columnshard/data_locks/manager/manager.h new file mode 100644 index 000000000000..9075397d67ec --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/manager/manager.h @@ -0,0 +1,46 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NDataLocks { + +class TManager { +private: + THashMap> ProcessLocks; + std::shared_ptr StopFlag = std::make_shared(0); + void UnregisterLock(const TString& processId); +public: + TManager() = default; + + void Stop(); + + class TGuard { + private: + const TString ProcessId; + std::shared_ptr StopFlag; + bool Released = false; + public: + TGuard(const TString& processId, const std::shared_ptr& stopFlag) + : ProcessId(processId) + , StopFlag(stopFlag) + { + + } + ~TGuard(); + + void Release(TManager& manager); + }; + + [[nodiscard]] std::shared_ptr RegisterLock(const std::shared_ptr& lock); + template + [[nodiscard]] std::shared_ptr RegisterLock(Args&&... args) { + return RegisterLock(std::make_shared(args...)); + } + std::optional IsLocked(const TPortionInfo& portion) const; + std::optional IsLocked(const TGranuleMeta& granule) const; + +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_locks/manager/ya.make b/ydb/core/tx/columnshard/data_locks/manager/ya.make new file mode 100644 index 000000000000..4a5d8b2437f3 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/manager/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + manager.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_locks/locks +) + +END() diff --git a/ydb/core/tx/columnshard/data_locks/ya.make b/ydb/core/tx/columnshard/data_locks/ya.make new file mode 100644 index 000000000000..9212eb1c9582 --- /dev/null +++ b/ydb/core/tx/columnshard/data_locks/ya.make @@ -0,0 +1,8 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/data_locks/manager + ydb/core/tx/columnshard/data_locks/locks +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/common/context/context.cpp b/ydb/core/tx/columnshard/data_sharing/common/context/context.cpp new file mode 100644 index 000000000000..64f32b59b920 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/context/context.cpp @@ -0,0 +1,62 @@ +#include "context.h" +#include + +namespace NKikimr::NOlap::NDataSharing { + +NKikimrColumnShardDataSharingProto::TTransferContext TTransferContext::SerializeToProto() const { + NKikimrColumnShardDataSharingProto::TTransferContext result; + result.SetDestinationTabletId((ui64)DestinationTabletId); + for (auto&& i : SourceTabletIds) { + result.AddSourceTabletIds((ui64)i); + } + SnapshotBarrier.SerializeToProto(*result.MutableSnapshotBarrier()); + result.SetMoving(Moving); + return result; +} + +NKikimr::TConclusionStatus TTransferContext::DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TTransferContext& proto) { + DestinationTabletId = (TTabletId)proto.GetDestinationTabletId(); + if (!(ui64)DestinationTabletId) { + return TConclusionStatus::Fail("incorrect DestinationTabletId in proto"); + } + for (auto&& i : proto.GetSourceTabletIds()) { + AFL_VERIFY(SourceTabletIds.emplace((TTabletId)i).second); + } + Moving = proto.GetMoving(); + { + if (!proto.HasSnapshotBarrier()) { + return TConclusionStatus::Fail("SnapshotBarrier not initialized in proto."); + } + auto snapshotParse = SnapshotBarrier.DeserializeFromProto(proto.GetSnapshotBarrier()); + if (!snapshotParse) { + return snapshotParse; + } + if (!SnapshotBarrier.Valid()) { + return TConclusionStatus::Fail("SnapshotBarrier must be valid in proto."); + } + } + return TConclusionStatus::Success(); +} + +bool TTransferContext::IsEqualTo(const TTransferContext& context) const { + return + DestinationTabletId == context.DestinationTabletId && + SourceTabletIds == context.SourceTabletIds && + Moving == context.Moving && + SnapshotBarrier == context.SnapshotBarrier; +} + +TString TTransferContext::DebugString() const { + return TStringBuilder() << "{from=" << (ui64)DestinationTabletId << ";moving=" << Moving << ";snapshot=" << SnapshotBarrier.DebugString() << "}"; +} + +TTransferContext::TTransferContext(const TTabletId destination, const THashSet& sources, const TSnapshot& snapshotBarrier, const bool moving) + : DestinationTabletId(destination) + , SourceTabletIds(sources) + , Moving(moving) + , SnapshotBarrier(snapshotBarrier) +{ + AFL_VERIFY(!sources.contains(destination)); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/common/context/context.h b/ydb/core/tx/columnshard/data_sharing/common/context/context.h new file mode 100644 index 000000000000..b124bc7a07ea --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/context/context.h @@ -0,0 +1,29 @@ +#pragma once +#include +#include +#include + +#include +#include + +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTransferContext { +private: + YDB_READONLY(TTabletId, DestinationTabletId, (TTabletId)0); + YDB_READONLY_DEF(THashSet, SourceTabletIds); + YDB_READONLY(bool, Moving, false); + YDB_READONLY(TSnapshot, SnapshotBarrier, TSnapshot::Zero()); +public: + TTransferContext() = default; + bool IsEqualTo(const TTransferContext& context) const; + TString DebugString() const; + + TTransferContext(const TTabletId destination, const THashSet& sources, const TSnapshot& snapshotBarrier, const bool moving); + NKikimrColumnShardDataSharingProto::TTransferContext SerializeToProto() const; + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TTransferContext& proto); +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/common/context/ya.make b/ydb/core/tx/columnshard/data_sharing/common/context/ya.make new file mode 100644 index 000000000000..c5f45338b5f6 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/context/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + context.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/protos + ydb/library/actors/core + ydb/library/conclusion +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/common/session/common.cpp b/ydb/core/tx/columnshard/data_sharing/common/session/common.cpp new file mode 100644 index 000000000000..bb22359d925a --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/session/common.cpp @@ -0,0 +1,66 @@ +#include "common.h" + +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NDataSharing { + +TString TCommonSession::DebugString() const { + return TStringBuilder() << "{id=" << SessionId << ";context=" << TransferContext.DebugString() << ";}"; +} + +bool TCommonSession::Start(const NColumnShard::TColumnShard& shard) { + const NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("info", Info); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("info", "Start"); + AFL_VERIFY(!IsStartingFlag); + IsStartingFlag = true; + AFL_VERIFY(!IsStartedFlag); + const auto& index = shard.GetIndexAs(); + THashMap>> portionsByPath; + std::vector> portionsLock; + THashMap> local; + for (auto&& i : GetPathIdsForStart()) { +// const auto insertTableSnapshot = shard.GetInsertTable().GetMinCommittedSnapshot(i); +// const auto shardSnapshot = shard.GetLastCompletedTx(); +// if (shard.GetInsertTable().GetMinCommittedSnapshot(i).value_or(shard.GetLastPlannedSnapshot()) <= GetSnapshotBarrier()) { +// AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("insert_table_snapshot", insertTableSnapshot)("last_completed_tx", shardSnapshot)("barrier", GetSnapshotBarrier()); +// IsStartingFlag = false; +// return false; +// } + auto& portionsVector = portionsByPath[i]; + const auto& g = index.GetGranuleVerified(i); + for (auto&& p : g.GetPortionsOlderThenSnapshot(GetSnapshotBarrier())) { + if (shard.GetDataLocksManager()->IsLocked(*p.second)) { + IsStartingFlag = false; + return false; + } + portionsVector.emplace_back(p.second); + portionsLock.emplace_back(p.second); + } + } + + IsStartedFlag = DoStart(shard, portionsByPath); + if (IsFinishedFlag) { + IsStartedFlag = false; + } + if (IsStartedFlag) { + AFL_VERIFY(!LockGuard); + LockGuard = shard.GetDataLocksManager()->RegisterLock("sharing_session:" + GetSessionId(), portionsLock, true); + } + IsStartingFlag = false; + return IsStartedFlag; +} + +void TCommonSession::Finish(const std::shared_ptr& dataLocksManager) { + AFL_VERIFY(!IsFinishedFlag); + IsFinishedFlag = true; + if (IsStartedFlag) { + AFL_VERIFY(LockGuard); + LockGuard->Release(*dataLocksManager); + } +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/common/session/common.h b/ydb/core/tx/columnshard/data_sharing/common/session/common.h new file mode 100644 index 000000000000..c3070b5e7b8e --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/session/common.h @@ -0,0 +1,110 @@ +#pragma once +#include +#include +#include +#include + +#include +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimr::NOlap { +class TPortionInfo; +namespace NDataLocks { +class TManager; +} +} + +namespace NKikimr::NOlap::NDataSharing { + +class TCommonSession { +private: + static ui64 GetNextRuntimeId() { + static TAtomicCounter Counter = 0; + return (ui64)Counter.Inc(); + } + + YDB_READONLY_DEF(TString, SessionId); + const TString Info; + YDB_READONLY(ui64, RuntimeId, GetNextRuntimeId()); + std::shared_ptr LockGuard; + bool IsStartedFlag = false; + bool IsStartingFlag = false; + bool IsFinishedFlag = false; +protected: + TTransferContext TransferContext; + virtual bool DoStart(const NColumnShard::TColumnShard& shard, const THashMap>>& portions) = 0; + virtual THashSet GetPathIdsForStart() const = 0; +public: + virtual ~TCommonSession() = default; + + TCommonSession(const TString& info) + : Info(info) + { + + } + + TCommonSession(const TString& sessionId, const TString& info, const TTransferContext& transferContext) + : SessionId(sessionId) + , Info(info) + , TransferContext(transferContext) { + } + + bool IsFinished() const { + return IsFinishedFlag; + } + + bool IsStarted() const { + return IsStartedFlag; + } + + bool IsStarting() const { + return IsStartingFlag; + } + + bool IsEqualTo(const TCommonSession& item) const { + return SessionId == item.SessionId && TransferContext.IsEqualTo(item.TransferContext); + } + + bool Start(const NColumnShard::TColumnShard& shard); + void Finish(const std::shared_ptr& dataLocksManager); + + const TSnapshot& GetSnapshotBarrier() const { + return TransferContext.GetSnapshotBarrier(); + } + + TString DebugString() const; + + template + void SerializeToProto(TProto& proto) const { + AFL_VERIFY(SessionId); + *proto.MutableSessionId() = SessionId; + *proto.MutableTransferContext() = TransferContext.SerializeToProto(); + } + + template + TConclusionStatus DeserializeFromProto(const TProto& proto) { + { + SessionId = proto.GetSessionId(); + if (!SessionId) { + return TConclusionStatus::Fail("SessionId not initialized in proto."); + } + } + { + if (!proto.HasTransferContext()) { + return TConclusionStatus::Fail("TransferContext not initialized in proto."); + } + auto parsing = TransferContext.DeserializeFromProto(proto.GetTransferContext()); + if (!parsing) { + return parsing; + } + } + return TConclusionStatus::Success(); + } + +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/common/session/ya.make b/ydb/core/tx/columnshard/data_sharing/common/session/ya.make new file mode 100644 index 000000000000..2e68440a4a6b --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/session/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + common.cpp +) + +PEERDIR( + ydb/library/conclusion + ydb/core/tx/columnshard/common + ydb/core/tx/columnshard/data_sharing/common/context + ydb/core/tablet_flat +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.cpp b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.cpp new file mode 100644 index 000000000000..8a97c55b0ed3 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.cpp @@ -0,0 +1,5 @@ +#include "tx_extension.h" + +namespace NKikimr::NColumnShard::NDataSharing { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h new file mode 100644 index 000000000000..055081e37173 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/transactions/tx_extension.h @@ -0,0 +1,35 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +template +class TExtendedTransactionBase: public NTabletFlatExecutor::TTransactionBase { +private: + const TString TxInfo; + const ui32 TabletTxNo; + using TBase = NTabletFlatExecutor::TTransactionBase; + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& ctx) = 0; + virtual void DoComplete(const NActors::TActorContext & ctx) = 0; + +public: + virtual bool Execute(NTabletFlatExecutor::TTransactionContext& txc, const NActors::TActorContext& ctx) override final { + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("tx_no", TabletTxNo)("tx_info", TxInfo); + return DoExecute(txc, ctx); + } + virtual void Complete(const NActors::TActorContext& ctx) override final { + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("tablet_id", TBase::Self->TabletID())("tx_no", TabletTxNo)("tx_info", TxInfo); + return DoComplete(ctx); + } + + TExtendedTransactionBase(TShard* self, const TString& txInfo = Default()) + : TBase(self) + , TxInfo(txInfo) + , TabletTxNo(++TBase::Self->TabletTxCounter) + { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/data_sharing/common/transactions/ya.make b/ydb/core/tx/columnshard/data_sharing/common/transactions/ya.make new file mode 100644 index 000000000000..30aa6d488922 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/transactions/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +SRCS( + tx_extension.cpp +) + +PEERDIR( + ydb/core/tablet_flat + ydb/core/tx/tiering + ydb/services/metadata/abstract + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/blobs_action/protos + ydb/core/base + ydb/core/tx/tiering +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/common/ya.make b/ydb/core/tx/columnshard/data_sharing/common/ya.make new file mode 100644 index 000000000000..1d1caafc12c6 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/common/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/common/session + ydb/core/tx/columnshard/data_sharing/common/transactions + ydb/core/tx/columnshard/data_sharing/common/context +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/control.cpp b/ydb/core/tx/columnshard/data_sharing/destination/events/control.cpp new file mode 100644 index 000000000000..cb0407f1aba2 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/control.cpp @@ -0,0 +1,14 @@ +#include "control.h" +#include + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +TEvProposeFromInitiator::TEvProposeFromInitiator(const TDestinationSession& session) { + *Record.MutableSession() = session.SerializeDataToProto(); +} + +TEvConfirmFromInitiator::TEvConfirmFromInitiator(const TString& sessionId) { + *Record.MutableSessionId() = sessionId; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/control.h b/ydb/core/tx/columnshard/data_sharing/destination/events/control.h new file mode 100644 index 000000000000..eff9504fa0b5 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/control.h @@ -0,0 +1,33 @@ +#pragma once +#include +#include + +#include + +namespace NKikimr::NOlap::NDataSharing { +class TDestinationSession; +} + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +struct TEvProposeFromInitiator: public NActors::TEventPB { + TEvProposeFromInitiator() = default; + + TEvProposeFromInitiator(const TDestinationSession& session); +}; + +struct TEvConfirmFromInitiator: public NActors::TEventPB { + TEvConfirmFromInitiator() = default; + + TEvConfirmFromInitiator(const TString& sessionId); +}; + +struct TEvAckFinishFromInitiator: public NActors::TEventPB { + TEvAckFinishFromInitiator() = default; + + TEvAckFinishFromInitiator(const TString& sharingId) { + Record.SetSessionId(sharingId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/status.cpp b/ydb/core/tx/columnshard/data_sharing/destination/events/status.cpp new file mode 100644 index 000000000000..e6af6b90d5ab --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/status.cpp @@ -0,0 +1,5 @@ +#include "status.h" + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/status.h b/ydb/core/tx/columnshard/data_sharing/destination/events/status.h new file mode 100644 index 000000000000..7a1327be236d --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/status.h @@ -0,0 +1,16 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +struct TEvCheckStatusFromInitiator: public NActors::TEventPB { + TEvCheckStatusFromInitiator() = default; + + TEvCheckStatusFromInitiator(const TString& sessionId) { + Record.SetSessionId(sessionId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.cpp b/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.cpp new file mode 100644 index 000000000000..55d7e8e9ea7d --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.cpp @@ -0,0 +1,68 @@ +#include "transfer.h" +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +THashMap TPathIdData::BuildLinkTabletTasks( + const std::shared_ptr& sharedBlobs, const TTabletId selfTabletId, const TTransferContext& context, const TVersionedIndex& index) { + THashMap> blobIds; + for (auto&& i : Portions) { + auto schema = i.GetSchema(index); + i.FillBlobIdsByStorage(blobIds, schema->GetIndexInfo()); + } + + THashMap> blobsInfo; + + for (auto&& i : blobIds) { + auto storageManager = sharedBlobs->GetStorageManagerVerified(i.first); + auto storeCategories = storageManager->BuildStoreCategories(i.second); + auto& blobs = blobsInfo[i.first]; + for (auto it = storeCategories.GetDirect().GetIterator(); it.IsValid(); ++it) { + auto itSharing = blobs.find(it.GetBlobId()); + if (itSharing == blobs.end()) { + itSharing = blobs.emplace(it.GetBlobId(), TBlobSharing(i.first, it.GetBlobId())).first; + } + } + for (auto it = storeCategories.GetSharing().GetIterator(); it.IsValid(); ++it) { + auto itSharing = blobs.find(it.GetBlobId()); + if (itSharing == blobs.end()) { + itSharing = blobs.emplace(it.GetBlobId(), TBlobSharing(i.first, it.GetBlobId())).first; + } + itSharing->second.AddShared(it.GetTabletId()); + } + for (auto it = storeCategories.GetBorrowed().GetIterator(); it.IsValid(); ++it) { + auto itSharing = blobs.find(it.GetBlobId()); + if (itSharing == blobs.end()) { + itSharing = blobs.emplace(it.GetBlobId(), TBlobSharing(i.first, it.GetBlobId())).first; + } + itSharing->second.AddBorrowed(it.GetTabletId()); + } + } + + THashMap globalTabletTasks; + for (auto&& [storageId, blobs] : blobsInfo) { + THashMap storageTabletTasks; + for (auto&& [_, blobInfo] : blobs) { + THashMap blobTabletTasks = context.GetMoving() ? blobInfo.BuildTabletTasksOnMove(context, selfTabletId, storageId) : blobInfo.BuildTabletTasksOnCopy(context, selfTabletId, storageId); + for (auto&& [tId, tInfo] : blobTabletTasks) { + auto itTablet = storageTabletTasks.find(tId); + if (itTablet == storageTabletTasks.end()) { + itTablet = storageTabletTasks.emplace(tId, TStorageTabletTask(storageId, tId)).first; + } + itTablet->second.Merge(tInfo); + } + } + for (auto&& i : storageTabletTasks) { + auto it = globalTabletTasks.find(i.first); + if (it == globalTabletTasks.end()) { + it = globalTabletTasks.emplace(i.first, TTaskForTablet(i.first)).first; + } + it->second.AddStorage(std::move(i.second)); + } + } + return globalTabletTasks; +} + +} diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.h b/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.h new file mode 100644 index 000000000000..d4205a0db65d --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/transfer.h @@ -0,0 +1,107 @@ +#pragma once +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap { +class TVersionedIndex; +} + +namespace NKikimr::NOlap::NDataSharing { +class TSharedBlobsManager; +class TTaskForTablet; +} + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +class TPathIdData { +private: + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(std::vector, Portions); + + TPathIdData() = default; + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TPathIdData& proto, const TIndexInfo& indexInfo) { + if (!proto.HasPathId()) { + return TConclusionStatus::Fail("no path id in proto"); + } + PathId = proto.GetPathId(); + for (auto&& portionProto : proto.GetPortions()) { + TConclusion portion = TPortionInfo::BuildFromProto(portionProto, indexInfo); + if (!portion) { + return portion.GetError(); + } + Portions.emplace_back(portion.DetachResult()); + } + return TConclusionStatus::Success(); + } +public: + TPathIdData(const ui64 pathId, const std::vector& portions) + : PathId(pathId) + , Portions(portions) + { + + } + + std::vector DetachPortions() { + return std::move(Portions); + } + THashMap BuildLinkTabletTasks(const std::shared_ptr& sharedBlobs, const TTabletId selfTabletId, + const TTransferContext& context, const TVersionedIndex& index); + + void InitPortionIds(ui64* lastPortionId, const std::optional pathId = {}) { + AFL_VERIFY(lastPortionId); + for (auto&& i : Portions) { + i.SetPortion(++*lastPortionId); + if (pathId) { + i.SetPathId(*pathId); + } + } + } + + void SerializeToProto(NKikimrColumnShardDataSharingProto::TPathIdData& proto) const { + proto.SetPathId(PathId); + for (auto&& i : Portions) { + i.SerializeToProto(*proto.AddPortions()); + } + }; + + + static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TPathIdData& proto, const TIndexInfo& indexInfo) { + TPathIdData result; + auto resultParsing = result.DeserializeFromProto(proto, indexInfo); + if (!resultParsing) { + return resultParsing; + } else { + return result; + } + } + +}; + +struct TEvSendDataFromSource: public NActors::TEventPB { + TEvSendDataFromSource() = default; + + TEvSendDataFromSource(const TString& sessionId, const ui32 packIdx, const TTabletId sourceTabletId, const THashMap& pathIdData) { + Record.SetSessionId(sessionId); + Record.SetPackIdx(packIdx); + Record.SetSourceTabletId((ui64)sourceTabletId); + for (auto&& i : pathIdData) { + i.second.SerializeToProto(*Record.AddPathIdData()); + } + } +}; + +struct TEvFinishedFromSource: public NActors::TEventPB { + TEvFinishedFromSource() = default; + + TEvFinishedFromSource(const TString& sessionId, const TTabletId sourceTabletId) { + Record.SetSessionId(sessionId); + Record.SetSourceTabletId((ui64)sourceTabletId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/events/ya.make b/ydb/core/tx/columnshard/data_sharing/destination/events/ya.make new file mode 100644 index 000000000000..89b02707d5f4 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/events/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + transfer.cpp + status.cpp + control.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/portions + ydb/core/tx/columnshard/data_sharing/destination/session + ydb/core/tx/columnshard/data_sharing/protos + ydb/library/actors/core +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/destination/session/destination.cpp b/ydb/core/tx/columnshard/data_sharing/destination/session/destination.cpp new file mode 100644 index 000000000000..d70fd05f9849 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/session/destination.cpp @@ -0,0 +1,187 @@ +#include "destination.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +NKikimr::TConclusionStatus TDestinationSession::DataReceived(THashMap&& data, TColumnEngineForLogs& index, const std::shared_ptr& /*manager*/) { + auto guard = index.GranulesStorage->GetStats()->StartPackModification(); + for (auto&& i : data) { + auto it = PathIds.find(i.first); + AFL_VERIFY(it != PathIds.end())("path_id_undefined", i.first); + for (auto&& portion : i.second.DetachPortions()) { + ui32 contains = 0; + ui32 notContains = 0; + THashMap> blobIds; + portion.FillBlobIdsByStorage(blobIds, index.GetVersionedIndex()); + for (auto&& s : blobIds) { + auto it = CurrentBlobIds.find(s.first); + if (it == CurrentBlobIds.end()) { + notContains += s.second.size(); + continue; + } + for (auto&& b : s.second) { + if (it->second.contains(b)) { + ++contains; + } + } + } + AFL_VERIFY(!contains || !notContains); + if (!contains) { + portion.SetPathId(it->second); + index.UpsertPortion(std::move(portion)); + } + } + } + return TConclusionStatus::Success(); +} + +void TDestinationSession::SendCurrentCursorAck(const NColumnShard::TColumnShard& shard, const std::optional tabletId) { + AFL_VERIFY(IsStarted() || IsStarting()); + bool found = false; + bool allTransfersFinished = true; + for (auto&& [_, cursor] : Cursors) { + if (!cursor.GetDataFinished()) { + allTransfersFinished = false; + } + if (tabletId && *tabletId != cursor.GetTabletId()) { + continue; + } + found = true; + if (cursor.GetDataFinished()) { + auto ev = std::make_unique(GetSessionId()); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(ev.release(), (ui64)cursor.GetTabletId(), true), IEventHandle::FlagTrackDelivery, GetRuntimeId()); + } else if (cursor.GetPackIdx()) { + auto ev = std::make_unique(GetSessionId(), cursor.GetPackIdx()); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(ev.release(), (ui64)cursor.GetTabletId(), true), IEventHandle::FlagTrackDelivery, GetRuntimeId()); + } else { + std::set pathIdsBase; + for (auto&& i : PathIds) { + pathIdsBase.emplace(i.first); + } + TSourceSession source(GetSessionId(), TransferContext, cursor.GetTabletId(), pathIdsBase, (TTabletId)shard.TabletID()); + auto ev = std::make_unique(source); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(ev.release(), (ui64)cursor.GetTabletId(), true), IEventHandle::FlagTrackDelivery, GetRuntimeId()); + } + } + if (allTransfersFinished && !IsFinished()) { + NYDBTest::TControllers::GetColumnShardController()->OnDataSharingFinished(shard.TabletID(), GetSessionId()); + Finish(shard.GetDataLocksManager()); + InitiatorController.Finished(GetSessionId()); + } + AFL_VERIFY(found); +} + +NKikimr::TConclusion> TDestinationSession::ReceiveData( + NColumnShard::TColumnShard* self, const THashMap& data, const ui32 receivedPackIdx, const TTabletId sourceTabletId, + const std::shared_ptr& selfPtr) { + auto result = GetCursorVerified(sourceTabletId).ReceiveData(receivedPackIdx); + if (!result) { + return result; + } + return std::unique_ptr(new TTxDataFromSource(self, selfPtr, data, sourceTabletId)); +} + +NKikimr::TConclusion> TDestinationSession::ReceiveFinished(NColumnShard::TColumnShard* self, const TTabletId sourceTabletId, const std::shared_ptr& selfPtr) { + auto result = GetCursorVerified(sourceTabletId).ReceiveFinished(); + if (!result) { + return result; + } + return std::unique_ptr(new TTxFinishFromSource(self, sourceTabletId, selfPtr)); +} + +NKikimr::TConclusion> TDestinationSession::AckInitiatorFinished(NColumnShard::TColumnShard* self, const std::shared_ptr& selfPtr) { + return std::unique_ptr(new TTxFinishAckFromInitiator(self, selfPtr)); +} + +NKikimr::TConclusionStatus TDestinationSession::DeserializeDataFromProto(const NKikimrColumnShardDataSharingProto::TDestinationSession& proto, const TColumnEngineForLogs& index) { + if (!InitiatorController.DeserializeFromProto(proto.GetInitiatorController())) { + return TConclusionStatus::Fail("cannot parse initiator controller: " + proto.GetInitiatorController().DebugString()); + } + auto parseBase = TBase::DeserializeFromProto(proto); + if (!parseBase) { + return parseBase; + } + + for (auto&& i : TransferContext.GetSourceTabletIds()) { + Cursors.emplace(i, TSourceCursorForDestination(i)); + } + + for (auto&& i : proto.GetPathIds()) { + auto g = index.GetGranuleOptional(i.GetDestPathId()); + if (!g) { + return TConclusionStatus::Fail("Incorrect remapping into undefined path id: " + ::ToString(i.GetDestPathId())); + } + if (!i.GetSourcePathId() || !i.GetDestPathId()) { + return TConclusionStatus::Fail("PathIds remapping contains incorrect ids: " + i.DebugString()); + } + if (!PathIds.emplace(i.GetSourcePathId(), i.GetDestPathId()).second) { + return TConclusionStatus::Fail("PathIds contains duplicated values."); + } + } + if (PathIds.empty()) { + return TConclusionStatus::Fail("PathIds empty."); + } + return TConclusionStatus::Success(); +} + +NKikimrColumnShardDataSharingProto::TDestinationSession TDestinationSession::SerializeDataToProto() const { + NKikimrColumnShardDataSharingProto::TDestinationSession result; + InitiatorController.SerializeToProto(*result.MutableInitiatorController()); + TBase::SerializeToProto(result); + for (auto&& i : PathIds) { + auto* pathIdRemap = result.AddPathIds(); + pathIdRemap->SetSourcePathId(i.first); + pathIdRemap->SetDestPathId(i.second); + } + return result; +} + +NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor TDestinationSession::SerializeCursorToProto() const { + NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor result; + result.SetConfirmedFlag(ConfirmedFlag); + for (auto&& i : Cursors) { + *result.AddSourceCursors() = i.second.SerializeToProto(); + } + return result; +} + +NKikimr::TConclusionStatus TDestinationSession::DeserializeCursorFromProto(const NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor& proto) { + ConfirmedFlag = proto.GetConfirmedFlag(); + for (auto&& i : proto.GetSourceCursors()) { + TSourceCursorForDestination cursor; + auto parsed = cursor.DeserializeFromProto(i); + if (!parsed) { + return parsed; + } + auto it = Cursors.find(cursor.GetTabletId()); + AFL_VERIFY(it != Cursors.end()); + it->second = cursor; + } + return TConclusionStatus::Success(); +} + +bool TDestinationSession::DoStart(const NColumnShard::TColumnShard& shard, const THashMap>>& portions) { + AFL_VERIFY(IsConfirmed()); + NYDBTest::TControllers::GetColumnShardController()->OnDataSharingStarted(shard.TabletID(), GetSessionId()); + THashMap> local; + for (auto&& i : portions) { + for (auto&& p : i.second) { + p->FillBlobIdsByStorage(local, shard.GetIndexAs().GetVersionedIndex()); + } + } + std::swap(CurrentBlobIds, local); + SendCurrentCursorAck(shard, {}); + return true; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/session/destination.h b/ydb/core/tx/columnshard/data_sharing/destination/session/destination.h new file mode 100644 index 000000000000..a2780449d498 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/session/destination.h @@ -0,0 +1,134 @@ +#pragma once +#include + +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimr::NOlap { +class TColumnEngineForLogs; +class IStoragesManager; +} + +namespace NKikimr::NOlap::NDataSharing { + +namespace NEvents { +class TPathIdData; +} + +class TSourceCursorForDestination { +private: + YDB_READONLY(TTabletId, TabletId, (TTabletId)0); + YDB_READONLY(ui32, PackIdx, 0); + YDB_READONLY(bool, DataFinished, false); +public: + TSourceCursorForDestination() = default; + TSourceCursorForDestination(const TTabletId tabletId) + : TabletId(tabletId) + { + + } + + TConclusionStatus ReceiveData(const ui32 packIdxReceived) { + if (packIdxReceived != PackIdx + 1) { + return TConclusionStatus::Fail("inconsistency packIdx"); + } + PackIdx = packIdxReceived; + return TConclusionStatus::Success(); + } + + TConclusionStatus ReceiveFinished() { + if (DataFinished) { + return TConclusionStatus::Fail("inconsistency DataFinished"); + } + DataFinished = true; + return TConclusionStatus::Success(); + } + + [[nodiscard]] TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TDestinationSession::TSourceCursor& proto) { + TabletId = (TTabletId)proto.GetTabletId(); + PackIdx = proto.GetPackIdx(); + DataFinished = proto.GetFinished(); + return TConclusionStatus::Success(); + } + + [[nodiscard]] NKikimrColumnShardDataSharingProto::TDestinationSession::TSourceCursor SerializeToProto() const { + NKikimrColumnShardDataSharingProto::TDestinationSession::TSourceCursor result; + result.SetTabletId((ui64)TabletId); + result.SetPackIdx(PackIdx); + result.SetFinished(DataFinished); + return result; + } + +}; + +class TDestinationSession: public TCommonSession { +private: + using TBase = TCommonSession; + YDB_READONLY_DEF(TInitiatorControllerContainer, InitiatorController); + using TPathIdsRemapper = THashMap; + YDB_READONLY_DEF(TPathIdsRemapper, PathIds); + YDB_READONLY_FLAG(Confirmed, false); + THashMap Cursors; + THashMap> CurrentBlobIds; +protected: + virtual bool DoStart(const NColumnShard::TColumnShard& shard, const THashMap>>& portions) override; + virtual THashSet GetPathIdsForStart() const override { + THashSet result; + for (auto&& i : PathIds) { + result.emplace(i.first); + } + return result; + } +public: + TSourceCursorForDestination& GetCursorVerified(const TTabletId& tabletId) { + auto it = Cursors.find(tabletId); + AFL_VERIFY(it != Cursors.end()); + return it->second; + } + + TDestinationSession(const TInitiatorControllerContainer& controller, const TPathIdsRemapper& remapper, const TString& sessionId, const TTransferContext& context) + : TBase(sessionId, "destination_base", context) + , InitiatorController(controller) + , PathIds(remapper) + { + + } + + TDestinationSession() + : TBase("dest_proto") + { + + } + + void Confirm(const bool allowRepeat = false) { + AFL_VERIFY(!ConfirmedFlag || allowRepeat); + ConfirmedFlag = true; + } + + [[nodiscard]] TConclusionStatus DataReceived(THashMap&& data, TColumnEngineForLogs& index, const std::shared_ptr& manager); + + void SendCurrentCursorAck(const NColumnShard::TColumnShard& shard, const std::optional tabletId); + + NKikimrColumnShardDataSharingProto::TDestinationSession SerializeDataToProto() const; + + [[nodiscard]] TConclusion> ReceiveFinished(NColumnShard::TColumnShard* self, const TTabletId sourceTabletId, const std::shared_ptr& selfPtr); + + [[nodiscard]] TConclusion> AckInitiatorFinished(NColumnShard::TColumnShard* self, const std::shared_ptr& selfPtr); + + [[nodiscard]] TConclusion> ReceiveData(NColumnShard::TColumnShard* self, const THashMap& data, + const ui32 receivedPackIdx, const TTabletId sourceTabletId, const std::shared_ptr& selfPtr); + + NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor SerializeCursorToProto() const; + [[nodiscard]] TConclusionStatus DeserializeCursorFromProto(const NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor& proto); + + [[nodiscard]] TConclusionStatus DeserializeDataFromProto(const NKikimrColumnShardDataSharingProto::TDestinationSession& proto, const TColumnEngineForLogs& index); +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/session/ya.make b/ydb/core/tx/columnshard/data_sharing/destination/session/ya.make new file mode 100644 index 000000000000..92588efad0f8 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/session/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + destination.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/initiator/controller + ydb/core/tx/columnshard/data_sharing/common/session + ydb/core/tx/columnshard/data_sharing/common/transactions + ydb/core/tx/columnshard/data_sharing/destination/transactions +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_data_from_source.cpp b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_data_from_source.cpp new file mode 100644 index 000000000000..5c779525b3f3 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_data_from_source.cpp @@ -0,0 +1,44 @@ +#include "tx_data_from_source.h" +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +bool TTxDataFromSource::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NKikimr::NColumnShard; + TDbWrapper dbWrapper(txc.DB, nullptr); + { + ui64* lastPortionPtr = Self->TablesManager.MutablePrimaryIndexAsVerified().GetLastPortionPointer(); + for (auto&& i : PortionsByPathId) { + auto it = Session->GetPathIds().find(i.first); + AFL_VERIFY(it != Session->GetPathIds().end()); + i.second.InitPortionIds(lastPortionPtr, it->second); + } + dbWrapper.WriteCounter(TColumnEngineForLogs::LAST_PORTION, *lastPortionPtr); + } + THashMap> sharedBlobIds; + for (auto&& i : PortionsByPathId) { + for (auto&& p : i.second.GetPortions()) { + p.SaveToDatabase(dbWrapper); + } + } + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Session->GetSessionId()) + .Update(NIceDb::TUpdate(Session->SerializeCursorToProto().SerializeAsString())); + return true; +} + +void TTxDataFromSource::DoComplete(const TActorContext& /*ctx*/) { + Session->DataReceived(std::move(PortionsByPathId), Self->TablesManager.MutablePrimaryIndexAsVerified(), Self->GetStoragesManager()).Validate(); + Session->SendCurrentCursorAck(*Self, SourceTabletId); +} + +TTxDataFromSource::TTxDataFromSource(NColumnShard::TColumnShard* self, const std::shared_ptr& session, const THashMap& portionsByPathId, const TTabletId sourceTabletId) + : TBase(self) + , Session(session) + , PortionsByPathId(portionsByPathId) + , SourceTabletId(sourceTabletId) +{ +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_data_from_source.h b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_data_from_source.h new file mode 100644 index 000000000000..82b69ac41fb6 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_data_from_source.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTxDataFromSource: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; + THashMap PortionsByPathId; + THashMap> SharedBlobIds; + const TTabletId SourceTabletId; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& ctx) override; +public: + TTxDataFromSource(NColumnShard::TColumnShard* self, const std::shared_ptr& session, const THashMap& portionsByPathId, const TTabletId sourceTabletId); + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_DATA_FROM_SOURCE; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_ack_from_initiator.cpp b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_ack_from_initiator.cpp new file mode 100644 index 000000000000..ecd5dfeac082 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_ack_from_initiator.cpp @@ -0,0 +1,16 @@ +#include "tx_finish_ack_from_initiator.h" + +namespace NKikimr::NOlap::NDataSharing { + +bool TTxFinishAckFromInitiator::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NKikimr::NColumnShard; + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Session->GetSessionId()).Delete(); + return true; +} + +void TTxFinishAckFromInitiator::DoComplete(const TActorContext& /*ctx*/) { + Self->SharingSessionsManager->RemoveDestinationSession(Session->GetSessionId()); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_ack_from_initiator.h b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_ack_from_initiator.h new file mode 100644 index 000000000000..0d61359a1e8f --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_ack_from_initiator.h @@ -0,0 +1,26 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTxFinishAckFromInitiator: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& /*ctx*/) override; +public: + TTxFinishAckFromInitiator(NColumnShard::TColumnShard* self, const std::shared_ptr& session) + : TBase(self) + , Session(session) + { + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_FINISH_ACK_FROM_INITIATOR; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_from_source.cpp b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_from_source.cpp new file mode 100644 index 000000000000..7229dafe789e --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_from_source.cpp @@ -0,0 +1,17 @@ +#include "tx_finish_from_source.h" + +namespace NKikimr::NOlap::NDataSharing { + +bool TTxFinishFromSource::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Session->GetSessionId()) + .Update(NIceDb::TUpdate(Session->SerializeCursorToProto().SerializeAsString())); + return true; +} + +void TTxFinishFromSource::DoComplete(const TActorContext& /*ctx*/) { + Session->SendCurrentCursorAck(*Self, SourceTabletId); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_from_source.h b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_from_source.h new file mode 100644 index 000000000000..92a0bb667988 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_finish_from_source.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTxFinishFromSource: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; + const TTabletId SourceTabletId; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& ctx) override; +public: + TTxFinishFromSource(NColumnShard::TColumnShard* self, const TTabletId sourceTabletId, const std::shared_ptr& session) + : TBase(self) + , Session(session) + , SourceTabletId(sourceTabletId) + { + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_FINISH_FROM_SOURCE; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_start_from_initiator.cpp b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_start_from_initiator.cpp new file mode 100644 index 000000000000..4de8f6e30882 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_start_from_initiator.cpp @@ -0,0 +1,33 @@ +#include "tx_start_from_initiator.h" + +namespace NKikimr::NOlap::NDataSharing { + +bool TTxProposeFromInitiator::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Session->GetSessionId()) + .Update(NIceDb::TUpdate(Session->SerializeDataToProto().SerializeAsString())); + return true; +} + +void TTxProposeFromInitiator::DoComplete(const TActorContext& /*ctx*/) { + AFL_VERIFY(!Session->IsConfirmed()); + AFL_VERIFY(Sessions->emplace(Session->GetSessionId(), Session).second); + Session->GetInitiatorController().ProposeSuccess(Session->GetSessionId()); +} + +bool TTxConfirmFromInitiator::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + Session->Confirm(true); + db.Table().Key(Session->GetSessionId()) + .Update(NIceDb::TUpdate(Session->SerializeCursorToProto().SerializeAsString())); + return true; +} + +void TTxConfirmFromInitiator::DoComplete(const TActorContext& /*ctx*/) { + Session->Start(*Self); + Session->GetInitiatorController().ConfirmSuccess(Session->GetSessionId()); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_start_from_initiator.h b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_start_from_initiator.h new file mode 100644 index 000000000000..e90a2af29935 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/tx_start_from_initiator.h @@ -0,0 +1,44 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTxProposeFromInitiator: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; + THashMap>* Sessions; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& ctx) override; +public: + TTxProposeFromInitiator(NColumnShard::TColumnShard* self, const std::shared_ptr& session, THashMap>& sessions, const TString& info) + : TBase(self, info) + , Session(session) + , Sessions(&sessions) { + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_PROPOSE_FROM_INITIATOR; } +}; + +class TTxConfirmFromInitiator: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& ctx) override; +public: + TTxConfirmFromInitiator(NColumnShard::TColumnShard* self, const std::shared_ptr& session, const TString& info) + : TBase(self, info) + , Session(session) + { + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_CONFIRM_FROM_INITIATOR; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/destination/transactions/ya.make b/ydb/core/tx/columnshard/data_sharing/destination/transactions/ya.make new file mode 100644 index 000000000000..db1eca3269fa --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/transactions/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + tx_start_from_initiator.cpp + tx_data_from_source.cpp + tx_finish_from_source.cpp + tx_finish_ack_from_initiator.cpp +) + +PEERDIR( + ydb/core/tx/tiering + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/data_sharing/common/transactions +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/destination/ya.make b/ydb/core/tx/columnshard/data_sharing/destination/ya.make new file mode 100644 index 000000000000..77349cb09cb2 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/destination/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/destination/session + ydb/core/tx/columnshard/data_sharing/destination/transactions + ydb/core/tx/columnshard/data_sharing/destination/events +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/controller/abstract.cpp b/ydb/core/tx/columnshard/data_sharing/initiator/controller/abstract.cpp new file mode 100644 index 000000000000..f7eeaa175efb --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/controller/abstract.cpp @@ -0,0 +1,4 @@ +#include "abstract.h" + +namespace NKikimr::NOlap::NDataSharing { +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/controller/abstract.h b/ydb/core/tx/columnshard/data_sharing/initiator/controller/abstract.h new file mode 100644 index 000000000000..3eb908800729 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/controller/abstract.h @@ -0,0 +1,75 @@ +#pragma once +#include +#include + +#include + +#include + +namespace NKikimr::NOlap::NDataSharing { + +class IInitiatorController { +protected: + virtual void DoProposeError(const TString& sessionId, const TString& message) const = 0; + virtual void DoProposeSuccess(const TString& sessionId) const = 0; + virtual void DoConfirmSuccess(const TString& sessionId) const = 0; + virtual void DoFinished(const TString& sessionId) const = 0; + virtual void DoStatus(const TStatusContainer& status) const = 0; + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardDataSharingProto::TInitiator::TController& proto) = 0; + virtual void DoSerializeToProto(NKikimrColumnShardDataSharingProto::TInitiator::TController& proto) const = 0; +public: + using TProto = NKikimrColumnShardDataSharingProto::TInitiator::TController; + using TFactory = NObjectFactory::TObjectFactory; + + virtual ~IInitiatorController() = default; + + void SerializeToProto(NKikimrColumnShardDataSharingProto::TInitiator::TController& proto) const { + return DoSerializeToProto(proto); + } + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TInitiator::TController& proto) { + return DoDeserializeFromProto(proto); + } + + void Status(const TStatusContainer& status) const { + DoStatus(status); + } + void ProposeError(const TString& sessionId, const TString& message) const { + DoProposeError(sessionId, message); + } + void ConfirmSuccess(const TString& sessionId) const { + DoConfirmSuccess(sessionId); + } + void ProposeSuccess(const TString& sessionId) const { + DoProposeSuccess(sessionId); + } + void Finished(const TString& sessionId) const { + DoFinished(sessionId); + } + virtual TString GetClassName() const = 0; +}; + +class TInitiatorControllerContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; +public: + using TBase::TBase; + + void Status(const TStatusContainer& status) const { + TBase::GetObjectPtrVerified()->Status(status); + } + void ProposeSuccess(const TString& sessionId) const { + TBase::GetObjectPtrVerified()->ProposeSuccess(sessionId); + } + void ConfirmSuccess(const TString& sessionId) const { + TBase::GetObjectPtrVerified()->ConfirmSuccess(sessionId); + } + void ProposeError(const TString& sessionId, const TString& message) const { + TBase::GetObjectPtrVerified()->ProposeError(sessionId, message); + } + void Finished(const TString& sessionId) const { + TBase::GetObjectPtrVerified()->Finished(sessionId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/controller/test.cpp b/ydb/core/tx/columnshard/data_sharing/initiator/controller/test.cpp new file mode 100644 index 000000000000..162ef65ff2c5 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/controller/test.cpp @@ -0,0 +1,4 @@ +#include "test.h" + +namespace NKikimr::NOlap::NDataSharing { +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/controller/test.h b/ydb/core/tx/columnshard/data_sharing/initiator/controller/test.h new file mode 100644 index 000000000000..95645fdba873 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/controller/test.h @@ -0,0 +1,37 @@ +#pragma once +#include "abstract.h" + +namespace NKikimr::NOlap::NDataSharing { + +class TTestInitiatorController: public IInitiatorController { +public: + static TString GetClassNameStatic() { + return "TEST"; + } +private: + static inline TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetClassNameStatic()); +protected: + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardDataSharingProto::TInitiator::TController& /*proto*/) override { + return TConclusionStatus::Success(); + } + virtual void DoSerializeToProto(NKikimrColumnShardDataSharingProto::TInitiator::TController& /*proto*/) const override { + + } + + virtual void DoStatus(const TStatusContainer& /*status*/) const override { + + } + virtual void DoProposeError(const TString& /*sessionId*/, const TString& /*message*/) const override { + } + virtual void DoProposeSuccess(const TString& /*sessionId*/) const override { + } + virtual void DoConfirmSuccess(const TString& /*sessionId*/) const override { + } + virtual void DoFinished(const TString& /*sessionId*/) const override { + } + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/controller/ya.make b/ydb/core/tx/columnshard/data_sharing/initiator/controller/ya.make new file mode 100644 index 000000000000..9f2dce70dbd7 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/controller/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + abstract.cpp + GLOBAL test.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/initiator/status + ydb/services/bg_tasks/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/status/abstract.cpp b/ydb/core/tx/columnshard/data_sharing/initiator/status/abstract.cpp new file mode 100644 index 000000000000..fc8a83aa36d3 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/status/abstract.cpp @@ -0,0 +1,5 @@ +#include "abstract.h" + +namespace NKikimr::NOlap::NDataSharing { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/status/abstract.h b/ydb/core/tx/columnshard/data_sharing/initiator/status/abstract.h new file mode 100644 index 000000000000..d5c8eb1c0b88 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/status/abstract.h @@ -0,0 +1,70 @@ +#pragma once +#include +#include + +#include +#include + +#include + +namespace NKikimr::NOlap::NDataSharing { + +enum class EStatus { + Undefined, + NotFound, + StartFailed, + InProgress +}; + +class IStatus { +private: + YDB_READONLY(EStatus, Status, EStatus::Undefined); + YDB_READONLY_DEF(TString, SessionId); +protected: + virtual NJson::TJsonValue DoDebugJson() const { + return NJson::JSON_NULL; + } + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardDataSharingProto::TInitiator::TStatus& proto) = 0; + virtual void DoSerializeFromProto(NKikimrColumnShardDataSharingProto::TInitiator::TStatus& proto) const = 0; +public: + using TProto = NKikimrColumnShardDataSharingProto::TInitiator::TStatus; + using TFactory = NObjectFactory::TObjectFactory; + IStatus(const EStatus status, const TString& sessionId) + : Status(status) + , SessionId(sessionId) + { + AFL_VERIFY(SessionId); + } + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TInitiator::TStatus& proto) { + if (!TryFromString(proto.GetClassName(), Status)) { + return TConclusionStatus::Fail("cannot parse class name as status: " + proto.GetClassName()); + } + SessionId = proto.GetSessionId(); + return DoDeserializeFromProto(proto); + } + void SerializeToProto(NKikimrColumnShardDataSharingProto::TInitiator::TStatus& proto) const { + *proto.MutableSessionId() = SessionId; + return DoSerializeFromProto(proto); + } + + TString GetClassName() const { + return ::ToString(Status); + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("class_name", GetClassName()); + result.InsertValue("session_id", SessionId); + auto detailsJson = DoDebugJson(); + if (!detailsJson.IsNull()) { + result.InsertValue("details", std::move(detailsJson)); + } + return result; + } +}; + +class TStatusContainer: public NBackgroundTasks::TInterfaceProtoContainer { + +}; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/status/ya.make b/ydb/core/tx/columnshard/data_sharing/initiator/status/ya.make new file mode 100644 index 000000000000..743bfaf5d3e7 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/status/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + abstract.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/protos + ydb/services/bg_tasks/abstract +) + +GENERATE_ENUM_SERIALIZATION(abstract.h) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/initiator/ya.make b/ydb/core/tx/columnshard/data_sharing/initiator/ya.make new file mode 100644 index 000000000000..f5cbef5a1b75 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/initiator/ya.make @@ -0,0 +1,8 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/initiator/controller + ydb/core/tx/columnshard/data_sharing/initiator/status +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/manager/sessions.cpp b/ydb/core/tx/columnshard/data_sharing/manager/sessions.cpp new file mode 100644 index 000000000000..197fa88ac65d --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/manager/sessions.cpp @@ -0,0 +1,114 @@ +#include "sessions.h" +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +void TSessionsManager::Start(const NColumnShard::TColumnShard& shard) const { + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build()("sessions", "start")("tablet_id", shard.TabletID()); + for (auto&& i : SourceSessions) { + if (!i.second->IsStarted()) { + i.second->Start(shard); + } + } + for (auto&& i : DestSessions) { + if (!i.second->IsStarted() && i.second->IsConfirmed()) { + i.second->Start(shard); + } + } + NYDBTest::TControllers::GetColumnShardController()->OnAfterSharingSessionsManagerStart(shard); +} + +void TSessionsManager::InitializeEventsExchange(const NColumnShard::TColumnShard& shard, const std::optional sessionCookie) { + AFL_VERIFY(!sessionCookie || *sessionCookie); + for (auto&& i : SourceSessions) { + if (sessionCookie && *sessionCookie != i.second->GetRuntimeId()) { + continue; + } + i.second->ActualizeDestination(shard.GetDataLocksManager()); + } + for (auto&& i : DestSessions) { + if (sessionCookie && *sessionCookie != i.second->GetRuntimeId()) { + continue; + } + i.second->SendCurrentCursorAck(shard, {}); + } +} + +bool TSessionsManager::Load(NTable::TDatabase& database, const TColumnEngineForLogs* index) { + NIceDb::TNiceDb db(database); + using namespace NColumnShard; + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + auto session = std::make_shared((TTabletId)index->GetTabletId()); + + NKikimrColumnShardDataSharingProto::TSourceSession protoSession; + AFL_VERIFY(protoSession.ParseFromString(rowset.GetValue())); + + NKikimrColumnShardDataSharingProto::TSourceSession::TCursorDynamic protoSessionCursorDynamic; + AFL_VERIFY(protoSessionCursorDynamic.ParseFromString(rowset.GetValue())); + + NKikimrColumnShardDataSharingProto::TSourceSession::TCursorStatic protoSessionCursorStatic; + AFL_VERIFY(protoSessionCursorStatic.ParseFromString(rowset.GetValue())); + + AFL_VERIFY(index); + session->DeserializeFromProto(protoSession, protoSessionCursorDynamic, protoSessionCursorStatic).Validate(); + AFL_VERIFY(SourceSessions.emplace(session->GetSessionId(), session).second); + if (!rowset.Next()) { + return false; + } + } + + } + + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + auto session = std::make_shared(); + + NKikimrColumnShardDataSharingProto::TDestinationSession protoSession; + AFL_VERIFY(protoSession.ParseFromString(rowset.GetValue())); + + NKikimrColumnShardDataSharingProto::TDestinationSession::TFullCursor protoSessionCursor; + AFL_VERIFY(protoSessionCursor.ParseFromString(rowset.GetValue())); + + AFL_VERIFY(index); + session->DeserializeDataFromProto(protoSession, *index).Validate(); + session->DeserializeCursorFromProto(protoSessionCursor).Validate(); + AFL_VERIFY(DestSessions.emplace(session->GetSessionId(), session).second); + if (!rowset.Next()) { + return false; + } + } + } + return true; +} + +std::unique_ptr TSessionsManager::ProposeDestSession(NColumnShard::TColumnShard* self, const std::shared_ptr& session) { + AFL_VERIFY(session); + return std::make_unique(self, session, DestSessions, "tx_propose_from_initiator"); +} + +std::unique_ptr TSessionsManager::ConfirmDestSession(NColumnShard::TColumnShard* self, const std::shared_ptr& session) { + AFL_VERIFY(session); + return std::make_unique(self, session, "tx_confirm_from_initiator"); +} + +std::unique_ptr TSessionsManager::InitializeSourceSession(NColumnShard::TColumnShard* self, const std::shared_ptr& session) { + AFL_VERIFY(session); + return std::make_unique(self, session, SourceSessions, "tx_start_to_source"); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/manager/sessions.h b/ydb/core/tx/columnshard/data_sharing/manager/sessions.h new file mode 100644 index 000000000000..691b42ad8bb4 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/manager/sessions.h @@ -0,0 +1,54 @@ +#pragma once +#include +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimr::NOlap::NDataSharing { + +class TSessionsManager { +private: + THashMap> SourceSessions; + THashMap> DestSessions; +public: + TSessionsManager() = default; + + void Start(const NColumnShard::TColumnShard& shard) const; + + std::shared_ptr GetSourceSession(const TString& sessionId) const { + auto it = SourceSessions.find(sessionId); + if (it == SourceSessions.end()) { + return nullptr; + } + return it->second; + } + + std::shared_ptr GetDestinationSession(const TString& sessionId) const { + auto it = DestSessions.find(sessionId); + if (it == DestSessions.end()) { + return nullptr; + } + return it->second; + } + + void RemoveSourceSession(const TString& sessionId) { + SourceSessions.erase(sessionId); + } + + void RemoveDestinationSession(const TString& sessionId) { + DestSessions.erase(sessionId); + } + + [[nodiscard]] bool Load(NTable::TDatabase& database, const TColumnEngineForLogs* index); + + void InitializeEventsExchange(const NColumnShard::TColumnShard& shard, const std::optional sessionCookie = {}); + + std::unique_ptr InitializeSourceSession(NColumnShard::TColumnShard* self, const std::shared_ptr& session); + std::unique_ptr ProposeDestSession(NColumnShard::TColumnShard* self, const std::shared_ptr& session); + std::unique_ptr ConfirmDestSession(NColumnShard::TColumnShard* self, const std::shared_ptr& session); + +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.cpp b/ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.cpp new file mode 100644 index 000000000000..40387937a93b --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.cpp @@ -0,0 +1,127 @@ +#include "shared_blobs.h" +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +bool TSharedBlobsManager::LoadIdempotency(NTable::TDatabase& database) { + NIceDb::TNiceDb db(database); + using namespace NKikimr::NColumnShard; + THashMap>> sharedBlobIds; + THashMap> borrowedBlobIds; + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) + return false; + + TString error; + while (!rowset.EndOfSet()) { + const TString& storageId = rowset.GetValue(); + auto unifiedBlobId = NOlap::TUnifiedBlobId::BuildFromString(rowset.GetValue(), nullptr); + AFL_VERIFY(!!unifiedBlobId)("error", unifiedBlobId.GetErrorMessage()); + AFL_VERIFY(sharedBlobIds[storageId][*unifiedBlobId].emplace((TTabletId)rowset.GetValue()).second)("blob_id", *unifiedBlobId)("storage_id", storageId); + if (!rowset.Next()) + return false; + } + } + + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) + return false; + + TString error; + + while (!rowset.EndOfSet()) { + const TString& storageId = rowset.GetValue(); + auto unifiedBlobId = NOlap::TUnifiedBlobId::BuildFromString(rowset.GetValue(), nullptr); + AFL_VERIFY(!!unifiedBlobId)("error", unifiedBlobId.GetErrorMessage()); + AFL_VERIFY(borrowedBlobIds[storageId].emplace(*unifiedBlobId, (TTabletId)rowset.GetValue()).second)("blob_id", *unifiedBlobId)("storage_id", storageId); + if (!rowset.Next()) + return false; + } + } + for (auto&& i : Storages) { + i.second->Clear(); + } + for (auto&& [storageId, blobs] : sharedBlobIds) { + auto storage = GetStorageManagerGuarantee(storageId); + for (auto&& b : blobs) { + for (auto&& t : b.second) { + AFL_VERIFY(storage->UpsertSharedBlobOnLoad(b.first, t)); + } + } + } + for (auto&& [storageId, blobs] : borrowedBlobIds) { + auto storage = GetStorageManagerGuarantee(storageId); + for (auto&& b : blobs) { + AFL_VERIFY(storage->UpsertBorrowedBlobOnLoad(b.first, b.second)); + } + } + return true; +} + +void TStorageSharedBlobsManager::RemoveSharedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const TTabletsByBlob& blobIds) { + NIceDb::TNiceDb db(txc.DB); + for (auto i = blobIds.GetIterator(); i.IsValid(); ++i) { + db.Table().Key(StorageId, i.GetBlobId().ToStringNew(), (ui64)i.GetTabletId()).Delete(); + } +} + +void TStorageSharedBlobsManager::WriteSharedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const TTabletsByBlob& blobIds) { + NIceDb::TNiceDb db(txc.DB); + for (auto i = blobIds.GetIterator(); i.IsValid(); ++i) { + db.Table().Key(StorageId, i.GetBlobId().ToStringNew(), (ui64)i.GetTabletId()).Update(); + } +} + +void TStorageSharedBlobsManager::WriteBorrowedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const TTabletByBlob& blobIds) { + NIceDb::TNiceDb db(txc.DB); + for (auto&& it: blobIds) { + db.Table().Key(StorageId, it.first.ToStringNew()).Update(NIceDb::TUpdate((ui64)it.second)); + } +} + +void TStorageSharedBlobsManager::CASBorrowedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const TTabletId tabletIdFrom, const TTabletId tabletIdTo, const THashSet& blobIds) { + NIceDb::TNiceDb db(txc.DB); + for (auto&& i : blobIds) { + auto it = BorrowedBlobIds.find(i); + AFL_VERIFY(it != BorrowedBlobIds.end())("blob_id", i.ToStringNew()); + AFL_VERIFY(it->second == tabletIdFrom || it->second == tabletIdTo); + if (tabletIdTo == SelfTabletId) { + db.Table().Key(StorageId, i.ToStringNew()).Delete(); + } else { + db.Table().Key(StorageId, i.ToStringNew()).Update(NIceDb::TUpdate((ui64)tabletIdTo)); + } + } +} + +void TStorageSharedBlobsManager::OnTransactionExecuteAfterCleaning(const TBlobsCategories& removeTask, NTable::TDatabase& db) { + TBlobManagerDb dbBlobs(db); + for (auto&& i : removeTask.GetSharing()) { + for (auto&& b : i.second) { + dbBlobs.RemoveBlobSharing(StorageId, b, i.first); + } + } + for (auto&& i : removeTask.GetBorrowed()) { + for (auto&& blob : i.second) { + dbBlobs.RemoveBorrowedBlob(StorageId, blob); + } + } +} + +void TStorageSharedBlobsManager::OnTransactionCompleteAfterCleaning(const TBlobsCategories& removeTask) { + for (auto i = removeTask.GetSharing().GetIterator(); i.IsValid(); ++i) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("action", "remove_share")("tablet_id_share", i.GetTabletId())("blob_id", i.GetBlobId().ToStringNew()); + SharedBlobIds.Remove(i.GetTabletId(), i.GetBlobId()); + } + for (auto i = removeTask.GetBorrowed().GetIterator(); i.IsValid(); ++i) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("action", "remove_own")("tablet_id_own", i.GetTabletId())("blob_id", i.GetBlobId().ToStringNew()); + auto it = BorrowedBlobIds.find(i.GetBlobId()); + AFL_VERIFY(it != BorrowedBlobIds.end()); + BorrowedBlobIds.erase(it); + } +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.h b/ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.h new file mode 100644 index 000000000000..c9cacb603aeb --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/manager/shared_blobs.h @@ -0,0 +1,247 @@ +#pragma once +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TStorageSharedBlobsManager { +private: + const TString StorageId; + const TTabletId SelfTabletId; + THashMap BorrowedBlobIds; // blobId -> owned by tabletId + TTabletsByBlob SharedBlobIds; // blobId -> shared with tabletIds + + bool CheckRemoveBlobId(const TTabletId tabletId, const TUnifiedBlobId& blobId, TBlobsCategories& blobs) const { + const THashSet* shared = SharedBlobIds.Find(blobId); + bool doRemove = false; + if (shared) { + auto itTablet = shared->find(tabletId); + AFL_VERIFY(itTablet != shared->end()); + if (shared->size() == 1) { + doRemove = true; + } + blobs.AddSharing(tabletId, blobId); + } else { + doRemove = true; + } + if (doRemove) { + auto it = BorrowedBlobIds.find(blobId); + if (it != BorrowedBlobIds.end()) { + AFL_VERIFY(it->second != tabletId); + blobs.AddBorrowed(it->second, blobId); + } else { + blobs.AddDirect(tabletId, blobId); + } + } + return doRemove; + } +public: + TStorageSharedBlobsManager(const TString& storageId, const TTabletId tabletId) + : StorageId(storageId) + , SelfTabletId(tabletId) + { + + } + + bool IsTrivialLinks() const { + return BorrowedBlobIds.empty() && SharedBlobIds.IsEmpty(); + } + TTabletId GetSelfTabletId() const { + return SelfTabletId; + } + + TBlobsCategories GetBlobCategories() const { + TBlobsCategories result(SelfTabletId); + for (auto&& i : BorrowedBlobIds) { + result.AddBorrowed(i.second, i.first); + } + for (auto i = SharedBlobIds.GetIterator(); i.IsValid(); ++i) { + result.AddSharing(i.GetTabletId(), i.GetBlobId()); + } + return result; + } + + TBlobsCategories BuildRemoveCategories(TTabletsByBlob&& blobs) const { + TBlobsCategories result(SelfTabletId); + for (auto it = blobs.GetIterator(); it.IsValid(); ++it) { + CheckRemoveBlobId(it.GetTabletId(), it.GetBlobId(), result); + } + return result; + } + + TBlobsCategories BuildStoreCategories(const THashSet& blobIds) const { + TBlobsCategories result(SelfTabletId); + for (auto&& i : blobIds) { + auto* tabletIds = SharedBlobIds.Find(i); + auto it = BorrowedBlobIds.find(i); + bool borrowed = false; + bool direct = false; + bool shared = false; + if (it != BorrowedBlobIds.end()) { + result.AddBorrowed(it->second, i); + borrowed = true; + } else if (!tabletIds) { + result.AddDirect(SelfTabletId, i); + direct = true; + } + if (tabletIds) { + for (auto&& t : *tabletIds) { + result.AddSharing(t, i); + shared = true; + } + } + AFL_VERIFY((borrowed ? 1 : 0) + (direct ? 1 : 0) + (shared ? 1 : 0) == 1)("b", borrowed)("d", direct)("s", shared); + } + return result; + } + + void RemoveSharedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const TTabletsByBlob& blobIds); + + void RemoveSharedBlobs(const TTabletsByBlob& blobIds) { + for (auto i = blobIds.GetIterator(); i.IsValid(); ++i) { + AFL_VERIFY(SharedBlobIds.Remove(i.GetTabletId(), i.GetBlobId())); + } + } + + void WriteSharedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const TTabletsByBlob& blobIds); + + [[nodiscard]] bool AddSharedBlobs(const TTabletsByBlob& blobIds) { + bool result = true; + for (auto i = blobIds.GetIterator(); i.IsValid(); ++i) { + if (!SharedBlobIds.Add(i.GetTabletId(), i.GetBlobId())) { + result = false; + } + } + return result; + } + + void WriteBorrowedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const TTabletByBlob& blobIds); + + void AddBorrowedBlobs(const TTabletByBlob& blobIds) { + for (auto&& i : blobIds) { + auto infoInsert = BorrowedBlobIds.emplace(i.first, i.second); + if (!infoInsert.second) { + AFL_VERIFY(infoInsert.first->second == i.second)("before", infoInsert.first->second)("after", i.second); + } + } + } + + void CASBorrowedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const TTabletId tabletIdFrom, const TTabletId tabletIdTo, const THashSet& blobIds); + + void CASBorrowedBlobs(const TTabletId tabletIdFrom, const TTabletId tabletIdTo, const THashSet& blobIds) { + for (auto&& i : blobIds) { + auto it = BorrowedBlobIds.find(i); + AFL_VERIFY(it != BorrowedBlobIds.end()); + AFL_VERIFY(it->second == tabletIdFrom || it->second == tabletIdTo); + if (it->second == SelfTabletId) { + BorrowedBlobIds.erase(it); + } else { + it->second = tabletIdTo; + } + } + } + + [[nodiscard]] bool UpsertSharedBlobOnLoad(const TUnifiedBlobId& blobId, const TTabletId tabletId) { + return SharedBlobIds.Add(tabletId, blobId); + } + + [[nodiscard]] bool UpsertBorrowedBlobOnLoad(const TUnifiedBlobId& blobId, const TTabletId ownerTabletId) { + return BorrowedBlobIds.emplace(blobId, ownerTabletId).second; + } + + void Clear() { + SharedBlobIds.Clear(); + BorrowedBlobIds.clear(); + } + + void OnTransactionExecuteAfterCleaning(const TBlobsCategories& removeTask, NTable::TDatabase& db); + void OnTransactionCompleteAfterCleaning(const TBlobsCategories& removeTask); +}; + +class TSharedBlobsManager { +private: + const TTabletId SelfTabletId; + THashMap> Storages; +public: + TSharedBlobsManager(const TTabletId tabletId) + : SelfTabletId(tabletId) + { + + } + + bool IsTrivialLinks() const { + for (auto&& i : Storages) { + if (!i.second->IsTrivialLinks()) { + return false; + } + } + return true; + } + + THashMap GetBlobCategories() const { + THashMap result; + for (auto&& i : Storages) { + result.emplace(i.first, i.second->GetBlobCategories()); + } + return result; + } + + TTabletId GetSelfTabletId() const { + return SelfTabletId; + } + + void WriteSharedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const THashMap& blobIds) { + for (auto&& i : blobIds) { + GetStorageManagerGuarantee(i.first)->WriteSharedBlobsDB(txc, i.second); + } + } + + void AddSharingBlobs(const THashMap& blobIds) { + for (auto&& i : blobIds) { + Y_UNUSED(GetStorageManagerGuarantee(i.first)->AddSharedBlobs(i.second)); + } + } + + void WriteBorrowedBlobsDB(NTabletFlatExecutor::TTransactionContext& txc, const THashMap& blobIds) { + for (auto&& i : blobIds) { + GetStorageManagerGuarantee(i.first)->WriteBorrowedBlobsDB(txc, i.second); + } + } + + void AddBorrowedBlobs(const THashMap& blobIds) { + for (auto&& i : blobIds) { + GetStorageManagerGuarantee(i.first)->AddBorrowedBlobs(i.second); + } + } + + std::shared_ptr GetStorageManagerOptional(const TString& storageId) const { + auto it = Storages.find(storageId); + if (it == Storages.end()) { + return nullptr; + } + return it->second; + } + + std::shared_ptr GetStorageManagerVerified(const TString& storageId) const { + auto it = Storages.find(storageId); + AFL_VERIFY(it != Storages.end())("storage_id", storageId); + return it->second; + } + + std::shared_ptr GetStorageManagerGuarantee(const TString& storageId) { + auto it = Storages.find(storageId); + if (it == Storages.end()) { + it = Storages.emplace(storageId, std::make_shared(storageId, SelfTabletId)).first; + } + return it->second; + } + + bool LoadIdempotency(NTable::TDatabase& database); +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/manager/ya.make b/ydb/core/tx/columnshard/data_sharing/manager/ya.make new file mode 100644 index 000000000000..e26356c94a55 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/manager/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + sessions.cpp + shared_blobs.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/source/session + ydb/core/tx/columnshard/data_sharing/destination/session +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/modification/events/change_owning.cpp b/ydb/core/tx/columnshard/data_sharing/modification/events/change_owning.cpp new file mode 100644 index 000000000000..849fa635eac3 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/events/change_owning.cpp @@ -0,0 +1,13 @@ +#include "change_owning.h" +#include + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +TEvApplyLinksModification::TEvApplyLinksModification(const TTabletId initiatorTabletId, const TString& sessionId, const ui64 packIdx, const TTaskForTablet& task) { + Record.SetInitiatorTabletId((ui64)initiatorTabletId); + Record.SetSessionId(sessionId); + Record.SetPackIdx(packIdx); + *Record.MutableTask() = task.SerializeToProto(); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/modification/events/change_owning.h b/ydb/core/tx/columnshard/data_sharing/modification/events/change_owning.h new file mode 100644 index 000000000000..5ed6f02d62d4 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/events/change_owning.h @@ -0,0 +1,30 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { +class TTaskForTablet; +} + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +struct TEvApplyLinksModification: public NActors::TEventPB { + TEvApplyLinksModification() = default; + + TEvApplyLinksModification(const TTabletId initiatorTabletId, const TString& sessionId, const ui64 packIdx, const TTaskForTablet& task); +}; + +struct TEvApplyLinksModificationFinished: public NActors::TEventPB { + TEvApplyLinksModificationFinished() = default; + TEvApplyLinksModificationFinished(const TTabletId modifiedTabletId, const TString& sessionId, const ui64 packIdx) { + Record.SetSessionId(sessionId); + Record.SetModifiedTabletId((ui64)modifiedTabletId); + Record.SetPackIdx(packIdx); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/modification/events/ya.make b/ydb/core/tx/columnshard/data_sharing/modification/events/ya.make new file mode 100644 index 000000000000..0b800c5b7d67 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/events/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + change_owning.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/blobs_action/protos + ydb/core/tx/columnshard/data_sharing/protos + ydb/library/actors/core + ydb/core/tx/datashard +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/modification/tasks/modification.cpp b/ydb/core/tx/columnshard/data_sharing/modification/tasks/modification.cpp new file mode 100644 index 000000000000..3633406093f1 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/tasks/modification.cpp @@ -0,0 +1,43 @@ +#include "modification.h" +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +NKikimr::TConclusion> TTaskForTablet::BuildModificationTransaction(NColumnShard::TColumnShard* self, const TTabletId initiator, const TString& sessionId, const ui64 packIdx, const std::shared_ptr& selfPtr) { + return std::unique_ptr(new TTxApplyLinksModification(self, selfPtr, sessionId, initiator, packIdx)); +} + +void TTaskForTablet::ApplyForDB(NTabletFlatExecutor::TTransactionContext& txc, const std::shared_ptr& manager) const { + for (auto&& i : TasksByStorage) { + auto storageManager = manager->GetStorageManagerVerified(i.first); + i.second.ApplyForDB(txc, storageManager); + } +} + +void TTaskForTablet::ApplyForRuntime(const std::shared_ptr& manager) const { + for (auto&& i : TasksByStorage) { + auto storageManager = manager->GetStorageManagerVerified(i.first); + i.second.ApplyForRuntime(storageManager); + } +} + +void TStorageTabletTask::ApplyForDB(NTabletFlatExecutor::TTransactionContext& txc, const std::shared_ptr& manager) const { + for (auto&& i : RemapOwner) { + manager->CASBorrowedBlobsDB(txc, i.second.GetFrom(), i.second.GetTo(), {i.first}); + } + manager->WriteBorrowedBlobsDB(txc, InitOwner); + manager->WriteSharedBlobsDB(txc, AddSharingLinks); + manager->RemoveSharedBlobsDB(txc, RemoveSharingLinks); +} + +void TStorageTabletTask::ApplyForRuntime(const std::shared_ptr& manager) const { + for (auto&& i : RemapOwner) { + manager->CASBorrowedBlobs(i.second.GetFrom(), i.second.GetTo(), {i.first}); + } + manager->AddBorrowedBlobs(InitOwner); + Y_UNUSED(manager->AddSharedBlobs(AddSharingLinks)); + manager->RemoveSharedBlobs(RemoveSharingLinks); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/modification/tasks/modification.h b/ydb/core/tx/columnshard/data_sharing/modification/tasks/modification.h new file mode 100644 index 000000000000..4f5de2c0b38b --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/tasks/modification.h @@ -0,0 +1,364 @@ +#pragma once +#include + +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimr::NOlap { +class TColumnEngineForLogs; +} + +namespace NKikimr::NOlap::NDataSharing { + +class TStorageSharedBlobsManager; +class TSharedBlobsManager; + +namespace NEvents { +class TPathIdData; +} + +class TBlobOwnerRemap { +private: + YDB_READONLY_DEF(TTabletId, From); + YDB_READONLY_DEF(TTabletId, To); +public: + TBlobOwnerRemap(const TTabletId from, const TTabletId to) + : From(from) + , To(to) { + + } + + bool operator==(const TBlobOwnerRemap& item) const { + return From == item.From && To == item.To; + } +}; + +class TStorageTabletTask { +private: + TTabletId TabletId; + TString StorageId; + THashMap RemapOwner; + TTabletByBlob InitOwner; + TTabletsByBlob AddSharingLinks; + TTabletsByBlob RemoveSharingLinks; +public: + TStorageTabletTask(const TString& storageId, const TTabletId tabletId) + : TabletId(tabletId) + , StorageId(storageId) { + + } + + NKikimrColumnShardDataSharingProto::TStorageTabletTask SerializeToProto() const { + NKikimrColumnShardDataSharingProto::TStorageTabletTask result; + result.SetTabletId((ui64)TabletId); + result.SetStorageId(StorageId); + *result.MutableInitOwner() = InitOwner.SerializeToProto(); + *result.MutableAddSharingLinks() = AddSharingLinks.SerializeToProto(); + *result.MutableRemoveSharingLinks() = RemoveSharingLinks.SerializeToProto(); + + for (auto&& i : RemapOwner) { + auto* remapProto = result.AddRemapOwner(); + remapProto->SetBlobId(i.first.ToStringNew()); + remapProto->SetFrom((ui64)i.second.GetFrom()); + remapProto->SetTo((ui64)i.second.GetTo()); + } + + return result; + } + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TStorageTabletTask& proto) { + StorageId = proto.GetStorageId(); + if (!StorageId) { + return TConclusionStatus::Fail("empty storage id"); + } + TabletId = (TTabletId)proto.GetTabletId(); + if (!(ui64)TabletId) { + return TConclusionStatus::Fail("empty tablet id for storage task"); + } + { + auto parse = InitOwner.DeserializeFromProto(proto.GetInitOwner()); + if (!parse) { + return parse; + } + } + { + auto parse = AddSharingLinks.DeserializeFromProto(proto.GetAddSharingLinks()); + if (!parse) { + return parse; + } + } + { + auto parse = RemoveSharingLinks.DeserializeFromProto(proto.GetRemoveSharingLinks()); + if (!parse) { + return parse; + } + } + for (auto&& i : proto.GetRemapOwner()) { + auto parse = TUnifiedBlobId::BuildFromString(i.GetBlobId(), nullptr); + if (!parse) { + return parse; + } + RemapOwner.emplace(*parse, TBlobOwnerRemap((TTabletId)i.GetFrom(), (TTabletId)i.GetTo())); + } + return TConclusionStatus::Success(); + } + + void ApplyForDB(NTabletFlatExecutor::TTransactionContext& txc, const std::shared_ptr& manager) const; + + void ApplyForRuntime(const std::shared_ptr& manager) const; + + const TString GetStorageId() const { + return StorageId; + } + + void AddRemapOwner(const TUnifiedBlobId& blobId, const TTabletId from, const TTabletId to) { +// AFL_VERIFY(to != TabletId); + AFL_VERIFY(RemapOwner.emplace(blobId, TBlobOwnerRemap(from, to)).second); + } + + void AddInitOwner(const TUnifiedBlobId& blobId, const TTabletId to) { +// AFL_VERIFY(to != TabletId); + AFL_VERIFY(InitOwner->emplace(blobId, to).second); + } + + void AddLink(const TUnifiedBlobId& blobId, const TTabletId tabletId) { +// AFL_VERIFY(tabletId != TabletId); + AFL_VERIFY(AddSharingLinks.Add(tabletId, blobId)); + } + + void RemoveLink(const TUnifiedBlobId& blobId, const TTabletId tabletId) { + AFL_VERIFY(RemoveSharingLinks.Add(tabletId, blobId)); + } + + void Merge(const TStorageTabletTask& from) { + AFL_VERIFY(TabletId == from.TabletId); + for (auto&& i : from.InitOwner) { + auto info = InitOwner->emplace(i.first, i.second); + if (!info.second) { + AFL_VERIFY(info.first->second == i.second); + } + } + for (auto&& i : from.RemapOwner) { + auto info = RemapOwner.emplace(i.first, i.second); + if (!info.second) { + AFL_VERIFY(info.first->second == i.second); + } + } + AddSharingLinks.Add(from.AddSharingLinks); + RemoveSharingLinks.Add(from.RemoveSharingLinks); + } +}; + +class TTaskForTablet { +private: + YDB_READONLY(TTabletId, TabletId, (TTabletId)0); + THashMap TasksByStorage; +public: + TTaskForTablet(const TTabletId tabletId) + : TabletId(tabletId) + { + } + + void Merge(const TTaskForTablet& from) { + for (auto&& i : from.TasksByStorage) { + auto it = TasksByStorage.find(i.first); + if (it == TasksByStorage.end()) { + TasksByStorage.emplace(i.first, i.second); + } else { + it->second.Merge(i.second); + } + } + } + + void ApplyForDB(NTabletFlatExecutor::TTransactionContext& txc, const std::shared_ptr& manager) const; + + void ApplyForRuntime(const std::shared_ptr& manager) const; + + void AddStorage(TStorageTabletTask&& info) { + auto storageId = info.GetStorageId(); + TasksByStorage.emplace(storageId, std::move(info)); + } + + const TStorageTabletTask* GetStorageTasksOptional(const TString& storageId) const { + auto it = TasksByStorage.find(storageId); + if (it == TasksByStorage.end()) { + return nullptr; + } + return &it->second; + } + + const TStorageTabletTask& GetStorageTasksGuarantee(const TString& storageId) { + auto it = TasksByStorage.find(storageId); + if (it == TasksByStorage.end()) { + it = TasksByStorage.emplace(storageId, TStorageTabletTask(storageId, TabletId)).first; + } + return it->second; + } + + NKikimrColumnShardDataSharingProto::TTaskForTablet SerializeToProto() const { + NKikimrColumnShardDataSharingProto::TTaskForTablet result; + for (auto&& i : TasksByStorage) { + *result.AddTasksByStorage() = i.second.SerializeToProto(); + } + return result; + } + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TTaskForTablet& proto) { + for (auto&& i : proto.GetTasksByStorage()) { + TStorageTabletTask sTask("", TTabletId(0)); + auto parse = sTask.DeserializeFromProto(i); + if (!parse) { + return parse; + } + const TString storageId = sTask.GetStorageId(); + AFL_VERIFY(TasksByStorage.emplace(storageId, std::move(sTask)).second); + } + return TConclusionStatus::Success(); + } + + TConclusion> BuildModificationTransaction(NColumnShard::TColumnShard* self, const TTabletId initiator, + const TString& sessionId, const ui64 packIdx, const std::shared_ptr& selfPtr); + +}; + +class TBlobSharing { +private: + const TString StorageId; + TUnifiedBlobId BlobId; + std::optional Borrowed; + THashSet Shared; +public: + TBlobSharing(const TString& storageId, const TUnifiedBlobId& blobId) + : StorageId(storageId) + , BlobId(blobId) + { + + } + void AddShared(const TTabletId tabletId) { + AFL_VERIFY(Shared.emplace(tabletId).second); + } + void AddBorrowed(const TTabletId tabletId) { + AFL_VERIFY(!Borrowed); + Borrowed = tabletId; + } + + THashMap BuildTabletTasksOnCopy(const TTransferContext& context, const TTabletId selfTabletId, const TString& storageId) const { + auto toTabletId = context.GetDestinationTabletId(); + THashMap result; + const TTabletId ownerTabletId = Borrowed.value_or(selfTabletId); + if (Borrowed) { + AFL_VERIFY(Shared.empty()); + } + if (ownerTabletId != toTabletId) { + { + TStorageTabletTask task(storageId, ownerTabletId); + task.AddLink(BlobId, toTabletId); + task.AddLink(BlobId, selfTabletId); + AFL_VERIFY(result.emplace(ownerTabletId, std::move(task)).second); + } + { + TStorageTabletTask task(storageId, toTabletId); + task.AddInitOwner(BlobId, ownerTabletId); + AFL_VERIFY(result.emplace(toTabletId, std::move(task)).second); + } + } + return result; + } + + THashMap BuildTabletTasksOnMove(const TTransferContext& context, const TTabletId selfTabletId, const TString& storageId) const { + THashMap result; + auto& movedTabletId = context.GetSourceTabletIds(); + auto toTabletId = context.GetDestinationTabletId(); + if (Borrowed) { + AFL_VERIFY(Shared.empty()); + if (movedTabletId.contains(*Borrowed)) { + { + TStorageTabletTask task(storageId, toTabletId); + task.AddLink(BlobId, selfTabletId); + task.AddLink(BlobId, toTabletId); + AFL_VERIFY(result.emplace(toTabletId, std::move(task)).second); + } + { + TStorageTabletTask task(storageId, selfTabletId); + task.AddRemapOwner(BlobId, *Borrowed, toTabletId); + AFL_VERIFY(result.emplace(selfTabletId, std::move(task)).second); + } + { + TStorageTabletTask task(storageId, *Borrowed); + task.RemoveLink(BlobId, selfTabletId); + AFL_VERIFY(result.emplace(*Borrowed, std::move(task)).second); + } + } else if (toTabletId == *Borrowed) { + } else { + { + TStorageTabletTask task(storageId, *Borrowed); + task.AddLink(BlobId, toTabletId); + AFL_VERIFY(result.emplace(*Borrowed, std::move(task)).second); + } + { + TStorageTabletTask task(storageId, toTabletId); + task.AddInitOwner(BlobId, *Borrowed); + AFL_VERIFY(result.emplace(toTabletId, std::move(task)).second); + } + } + } else { + for (auto&& i : Shared) { + if (movedTabletId.contains(i) && i != selfTabletId) { + continue; + } + + if (i != selfTabletId) { + TStorageTabletTask task(StorageId, i); + task.AddRemapOwner(BlobId, selfTabletId, toTabletId); + AFL_VERIFY(result.emplace(i, std::move(task)).second); + } + + { + TStorageTabletTask task(StorageId, selfTabletId); + task.RemoveLink(BlobId, i); + auto info = result.emplace(selfTabletId, task); + if (!info.second) { + info.first->second.Merge(task); + } + } + + { + TStorageTabletTask task(StorageId, toTabletId); + task.AddLink(BlobId, i); + auto info = result.emplace(toTabletId, task); + if (!info.second) { + info.first->second.Merge(task); + } + } + } + { + TStorageTabletTask task(storageId, toTabletId); + task.AddLink(BlobId, selfTabletId); + task.AddLink(BlobId, toTabletId); + auto info = result.emplace(toTabletId, task); + if (!info.second) { + info.first->second.Merge(task); + } + } + { + TStorageTabletTask task(storageId, selfTabletId); + task.AddInitOwner(BlobId, toTabletId); + auto info = result.emplace(selfTabletId, task); + if (!info.second) { + info.first->second.Merge(task); + } + } + } + return result; + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/modification/tasks/ya.make b/ydb/core/tx/columnshard/data_sharing/modification/tasks/ya.make new file mode 100644 index 000000000000..f42b0f0c234e --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/tasks/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + modification.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/initiator/controller + ydb/core/tx/columnshard/data_sharing/common/session + ydb/core/tx/columnshard/data_sharing/common/transactions + ydb/core/tx/columnshard/data_sharing/destination/transactions +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/modification/transactions/tx_change_blobs_owning.cpp b/ydb/core/tx/columnshard/data_sharing/modification/transactions/tx_change_blobs_owning.cpp new file mode 100644 index 000000000000..d10cac2c1e6b --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/transactions/tx_change_blobs_owning.cpp @@ -0,0 +1,22 @@ +#include "tx_change_blobs_owning.h" +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +bool TTxApplyLinksModification::DoExecute(TTransactionContext& txc, const TActorContext&) { + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "execute"); + Task->ApplyForDB(txc, Self->GetStoragesManager()->GetSharedBlobsManager()); + return true; +} + +void TTxApplyLinksModification::DoComplete(const TActorContext& /*ctx*/) { + NActors::TLogContextGuard logGuard = NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tablet_id", Self->TabletID())("tx_state", "complete"); + Task->ApplyForRuntime(Self->GetStoragesManager()->GetSharedBlobsManager()); + + auto ev = std::make_unique(Task->GetTabletId(), SessionId, PackIdx); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(ev.release(), (ui64)InitiatorTabletId, true), IEventHandle::FlagTrackDelivery); +} + +} diff --git a/ydb/core/tx/columnshard/data_sharing/modification/transactions/tx_change_blobs_owning.h b/ydb/core/tx/columnshard/data_sharing/modification/transactions/tx_change_blobs_owning.h new file mode 100644 index 000000000000..d32ae4504563 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/transactions/tx_change_blobs_owning.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTaskForTablet; + +class TTxApplyLinksModification: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Task; + const TTabletId InitiatorTabletId; + const TString SessionId; + const ui64 PackIdx; + bool DoExecute(TTransactionContext& txc, const TActorContext& ctx) override; + void DoComplete(const TActorContext& ctx) override; +public: + TTxApplyLinksModification(NColumnShard::TColumnShard* self, const std::shared_ptr& task, const TString& sessionId, const TTabletId initiatorTabletId, const ui64 packIdx) + : TBase(self) + , Task(task) + , InitiatorTabletId(initiatorTabletId) + , SessionId(sessionId) + , PackIdx(packIdx) + { + AFL_VERIFY(!!Task); + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_APPLY_LINKS_MODIFICATION; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/modification/transactions/ya.make b/ydb/core/tx/columnshard/data_sharing/modification/transactions/ya.make new file mode 100644 index 000000000000..39afa046110c --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/transactions/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +SRCS( + tx_change_blobs_owning.cpp +) + +PEERDIR( + ydb/core/tablet_flat + ydb/core/tx/tiering + ydb/services/metadata/abstract + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/blobs_action/protos + ydb/core/base + ydb/core/tx/tiering +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/modification/ya.make b/ydb/core/tx/columnshard/data_sharing/modification/ya.make new file mode 100644 index 000000000000..81c688ef432e --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/modification/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/modification/tasks + ydb/core/tx/columnshard/data_sharing/modification/transactions + ydb/core/tx/columnshard/data_sharing/modification/events +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/protos/data.proto b/ydb/core/tx/columnshard/data_sharing/protos/data.proto new file mode 100644 index 000000000000..f59d6e7b3d7d --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/protos/data.proto @@ -0,0 +1,41 @@ +import "ydb/core/tx/columnshard/common/protos/blob_range.proto"; +import "ydb/core/tx/columnshard/common/protos/snapshot.proto"; +import "ydb/core/protos/tx_columnshard.proto"; + +package NKikimrColumnShardDataSharingProto; + +message TColumnRecord { + optional uint32 ColumnId = 1; + optional uint32 ChunkIdx = 2; + optional NKikimrColumnShardProto.TBlobRangeLink16 BlobRange = 3; + optional NKikimrTxColumnShard.TIndexColumnMeta Meta = 4; +} + +message TIndexChunk { + optional uint32 IndexId = 1; + optional uint32 ChunkIdx = 2; + optional NKikimrColumnShardProto.TBlobRangeLink16 BlobRange = 3; + + message TMeta { + optional uint32 RecordsCount = 1; + optional uint32 RawBytes = 2; + } + optional TMeta Meta = 4; +} + +message TPortionInfo { + optional uint64 PathId = 1; + optional uint64 PortionId = 2; + optional NKikimrColumnShardProto.TSnapshot MinSnapshotDeprecated = 3; + optional NKikimrColumnShardProto.TSnapshot RemoveSnapshot = 4; + optional NKikimrTxColumnShard.TIndexPortionMeta Meta = 5; + repeated TColumnRecord Records = 6; + repeated TIndexChunk Indexes = 7; + repeated NKikimrColumnShardProto.TUnifiedBlobId BlobIds = 8; + optional uint64 SchemaVersion = 9; +} + +message TPathIdData { + optional uint64 PathId = 1; + repeated TPortionInfo Portions = 2; +} diff --git a/ydb/core/tx/columnshard/data_sharing/protos/events.proto b/ydb/core/tx/columnshard/data_sharing/protos/events.proto new file mode 100644 index 000000000000..7f60cea111e0 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/protos/events.proto @@ -0,0 +1,67 @@ +import "ydb/library/actors/protos/actors.proto"; +import "ydb/core/tx/columnshard/data_sharing/protos/links.proto"; +import "ydb/core/tx/columnshard/data_sharing/protos/data.proto"; +import "ydb/core/tx/columnshard/data_sharing/protos/sessions.proto"; +import "ydb/core/tx/columnshard/data_sharing/protos/initiator.proto"; + +package NKikimrColumnShardDataSharingProto; + +message TEvProposeFromInitiator { + optional TDestinationSession Session = 1; +} + +message TEvConfirmFromInitiator { + optional string SessionId = 1; +} + +message TEvAckFinishFromInitiator { + optional string SessionId = 1; +} + +message TEvStartToSource { + optional TSourceSession Session = 1; +} + +message TEvSendDataFromSource { + optional string SessionId = 1; + optional uint64 PackIdx = 2; + repeated TPathIdData PathIdData = 3; + optional uint64 SourceTabletId = 4; +} + +message TEvAckDataToSource { + optional uint64 PackIdx = 1; + optional string SessionId = 2; +} + +message TEvFinishedFromSource { + optional string SessionId = 1; + optional uint64 SourceTabletId = 2; +} + +message TEvAckFinishToSource { + optional string SessionId = 1; +} + +message TEvCheckStatusFromInitiator { + optional string SessionId = 1; +} + +message TEvCheckStatusResult { + optional string ClassName = 1; + optional string SessionId = 2; + optional TInitiator.TStatus Status = 3; +} + +message TEvApplyLinksModification { + optional uint64 InitiatorTabletId = 1; + optional string SessionId = 2; + optional uint64 PackIdx = 3; + optional TTaskForTablet Task = 4; +} + +message TEvApplyLinksModificationFinished { + optional uint64 ModifiedTabletId = 1; + optional string SessionId = 2; + optional uint64 PackIdx = 3; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/protos/initiator.proto b/ydb/core/tx/columnshard/data_sharing/protos/initiator.proto new file mode 100644 index 000000000000..8f84e125473c --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/protos/initiator.proto @@ -0,0 +1,38 @@ +package NKikimrColumnShardDataSharingProto; + +message TInitiator { + message TController { + optional string ClassName = 1; + + message TTest { + } + + oneof Implementation { + TTest Test = 40; + } + } + + message TStatus { + message TInProgress { + optional uint32 PortionsCount = 1; + optional uint32 PortionsReady = 2; + } + + message TStartFailed { + optional string ErrorMessage = 1; + } + + message TNotFound { + optional string ErrorMessage = 1; + } + + optional string ClassName = 1; + optional string SessionId = 2; + + oneof Implementation { + TInProgress InProgress = 40; + TStartFailed StartFailed = 41; + TNotFound NotFound = 42; + } + } +} diff --git a/ydb/core/tx/columnshard/data_sharing/protos/links.proto b/ydb/core/tx/columnshard/data_sharing/protos/links.proto new file mode 100644 index 000000000000..26dee494fbc0 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/protos/links.proto @@ -0,0 +1,23 @@ +import "ydb/core/tx/columnshard/blobs_action/protos/blobs.proto"; + +package NKikimrColumnShardDataSharingProto; + +message TBlobOwnerRemap { + optional string BlobId = 1; + optional uint64 From = 2; + optional uint64 To = 3; +} + +message TStorageTabletTask { + optional uint64 TabletId = 1; + optional string StorageId = 2; + repeated TBlobOwnerRemap RemapOwner = 3; + optional NKikimrColumnShardBlobOperationsProto.TTabletByBlob InitOwner = 4; + optional NKikimrColumnShardBlobOperationsProto.TTabletsByBlob AddSharingLinks= 5; + optional NKikimrColumnShardBlobOperationsProto.TTabletsByBlob RemoveSharingLinks = 6; +} + +message TTaskForTablet { + optional uint64 TabletId = 1; + repeated TStorageTabletTask TasksByStorage = 2; +} diff --git a/ydb/core/tx/columnshard/data_sharing/protos/sessions.proto b/ydb/core/tx/columnshard/data_sharing/protos/sessions.proto new file mode 100644 index 000000000000..9ae73bf26138 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/protos/sessions.proto @@ -0,0 +1,59 @@ +package NKikimrColumnShardDataSharingProto; + +import "ydb/core/tx/columnshard/common/protos/snapshot.proto"; +import "ydb/core/tx/columnshard/data_sharing/protos/initiator.proto"; + +message TDestinationRemapIds { + optional uint64 SourcePathId = 1; + optional uint64 DestPathId = 2; +} + +message TTransferContext { + optional uint64 DestinationTabletId = 1; + repeated uint64 SourceTabletIds = 2; + optional bool Moving = 3[default = false]; + optional NKikimrColumnShardProto.TSnapshot SnapshotBarrier = 4; +} + +message TDestinationSession { + optional string SessionId = 1; + repeated TDestinationRemapIds PathIds = 3; + optional TInitiator.TController InitiatorController = 4; + optional TTransferContext TransferContext = 5; + + message TSourceCursor { + optional uint64 TabletId = 1; + optional uint64 PackIdx = 2; + optional bool Finished = 3; + } + message TFullCursor { + repeated TSourceCursor SourceCursors = 1; + optional bool ConfirmedFlag = 2 [default = false]; + } +} + +message TSourceSession { + optional string SessionId = 1; + repeated uint64 PathIds = 3; + optional uint64 DestinationTabletId = 4; + optional TTransferContext TransferContext = 5; + + message TCursorDynamic { + optional uint64 StartPathId = 1; + optional uint64 StartPortionId = 2; + optional uint64 NextPathId = 3; + optional uint64 NextPortionId = 4; + optional uint32 PackIdx = 5; + optional uint32 AckReceivedForPackIdx = 6[default = 0]; + repeated uint64 LinksModifiedTablets = 7; + } + + message TPathPortionsHash { + optional uint64 PathId = 1; + optional string Hash = 2; + } + + message TCursorStatic { + repeated TPathPortionsHash PathHashes = 7; + } +} diff --git a/ydb/core/tx/columnshard/data_sharing/protos/transfer.proto b/ydb/core/tx/columnshard/data_sharing/protos/transfer.proto new file mode 100644 index 000000000000..8d40ba1abfc1 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/protos/transfer.proto @@ -0,0 +1,19 @@ +import "ydb/library/actors/protos/actors.proto"; + +package NKikimrColumnShardDataSharingProto; + +message TEvSendDataFromSource { + optional NActorsProto.TActorId SourceActorId = 1; + optional string SharingId = 2; + repeated TPathIdData DataByPathId = 3; +} + +message TEvAckDataToSource { + optional NActorsProto.TActorId DestActorId = 1; + optional string SharingId = 2; +} + +message TEvFinishedFromSource { + optional NActorsProto.TActorId SourceActorId = 1; + optional string SharingId = 2; +} diff --git a/ydb/core/tx/columnshard/data_sharing/protos/ya.make b/ydb/core/tx/columnshard/data_sharing/protos/ya.make new file mode 100644 index 000000000000..f500be540e25 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/protos/ya.make @@ -0,0 +1,19 @@ +PROTO_LIBRARY() + +SRCS( + data.proto + events.proto + sessions.proto + initiator.proto + links.proto +) + +PEERDIR( + ydb/core/tx/columnshard/common/protos + ydb/library/actors/protos + ydb/core/tx/columnshard/blobs_action/protos + ydb/core/protos + +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/source/events/control.cpp b/ydb/core/tx/columnshard/data_sharing/source/events/control.cpp new file mode 100644 index 000000000000..d8decd48b8fd --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/events/control.cpp @@ -0,0 +1,10 @@ +#include "control.h" +#include + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +TEvStartToSource::TEvStartToSource(const TSourceSession& session) { + *Record.MutableSession() = session.SerializeDataToProto(); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/events/control.h b/ydb/core/tx/columnshard/data_sharing/source/events/control.h new file mode 100644 index 000000000000..73438ac41ed8 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/events/control.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { +class TSourceSession; +} + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +struct TEvStartToSource: public NActors::TEventPB { + TEvStartToSource() = default; + + TEvStartToSource(const TSourceSession& session); +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/events/transfer.cpp b/ydb/core/tx/columnshard/data_sharing/source/events/transfer.cpp new file mode 100644 index 000000000000..8e3f7cc3864a --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/events/transfer.cpp @@ -0,0 +1,5 @@ +#include "transfer.h" + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/events/transfer.h b/ydb/core/tx/columnshard/data_sharing/source/events/transfer.h new file mode 100644 index 000000000000..e10dc1e43ada --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/events/transfer.h @@ -0,0 +1,25 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing::NEvents { + +struct TEvAckDataToSource: public NActors::TEventPB { + TEvAckDataToSource() = default; + + TEvAckDataToSource(const TString& sessionId, const ui32 packIdx) { + Record.SetSessionId(sessionId); + Record.SetPackIdx(packIdx); + } +}; + +struct TEvAckFinishToSource: public NActors::TEventPB { + TEvAckFinishToSource() = default; + + TEvAckFinishToSource(const TString& sessionId) { + Record.SetSessionId(sessionId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/events/ya.make b/ydb/core/tx/columnshard/data_sharing/source/events/ya.make new file mode 100644 index 000000000000..d9f1fb7017de --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/events/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + transfer.cpp + control.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/source/session +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp new file mode 100644 index 000000000000..1ca8fac2ace7 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.cpp @@ -0,0 +1,181 @@ +#include "source.h" +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +void TSourceCursor::BuildSelection(const std::shared_ptr& sharedBlobsManager, const TVersionedIndex& index) { + THashMap result; + auto itCurrentPath = PortionsForSend.find(StartPathId); + AFL_VERIFY(itCurrentPath != PortionsForSend.end()); + auto itPortion = itCurrentPath->second.find(StartPortionId); + AFL_VERIFY(itPortion != itCurrentPath->second.end()); + ui32 count = 0; + ui32 chunksCount = 0; + bool selectMore = true; + for (; itCurrentPath != PortionsForSend.end() && selectMore; ++itCurrentPath) { + std::vector portions; + for (; itPortion != itCurrentPath->second.end(); ++itPortion) { + selectMore = (count < 10000 && chunksCount < 1000000); + if (!selectMore) { + NextPathId = itCurrentPath->first; + NextPortionId = itPortion->first; + } else { + portions.emplace_back(*itPortion->second); + chunksCount += portions.back().GetRecords().size(); + chunksCount += portions.back().GetIndexes().size(); + ++count; + } + } + if (portions.size()) { + NEvents::TPathIdData pathIdDataCurrent(itCurrentPath->first, portions); + result.emplace(itCurrentPath->first, pathIdDataCurrent); + } + } + if (selectMore) { + AFL_VERIFY(!NextPathId); + AFL_VERIFY(!NextPortionId); + } + + THashMap tabletTasksResult; + + for (auto&& i : result) { + THashMap tabletTasks = i.second.BuildLinkTabletTasks(sharedBlobsManager, SelfTabletId, TransferContext, index); + for (auto&& t : tabletTasks) { + auto it = tabletTasksResult.find(t.first); + if (it == tabletTasksResult.end()) { + tabletTasksResult.emplace(t.first, std::move(t.second)); + } else { + it->second.Merge(t.second); + } + } + } + + std::swap(Links, tabletTasksResult); + std::swap(Selected, result); +} + +bool TSourceCursor::Next(const std::shared_ptr& sharedBlobsManager, const TVersionedIndex& index) { + PreviousSelected = std::move(Selected); + LinksModifiedTablets.clear(); + Selected.clear(); + if (!NextPathId) { + AFL_VERIFY(!NextPortionId); + return false; + } else { + AFL_VERIFY(NextPortionId); + } + StartPathId = *NextPathId; + StartPortionId = *NextPortionId; + NextPathId = {}; + NextPortionId = {}; + ++PackIdx; + BuildSelection(sharedBlobsManager, index); + AFL_VERIFY(IsValid()); + return true; +} + +NKikimrColumnShardDataSharingProto::TSourceSession::TCursorDynamic TSourceCursor::SerializeDynamicToProto() const { + NKikimrColumnShardDataSharingProto::TSourceSession::TCursorDynamic result; + result.SetStartPathId(StartPathId); + result.SetStartPortionId(StartPortionId); + if (NextPathId) { + result.SetNextPathId(*NextPathId); + } + if (NextPortionId) { + result.SetNextPortionId(*NextPortionId); + } + result.SetPackIdx(PackIdx); + result.SetAckReceivedForPackIdx(AckReceivedForPackIdx); + for (auto&& t : LinksModifiedTablets) { + result.AddLinksModifiedTablets((ui64)t); + } + return result; +} + +NKikimrColumnShardDataSharingProto::TSourceSession::TCursorStatic TSourceCursor::SerializeStaticToProto() const { + NKikimrColumnShardDataSharingProto::TSourceSession::TCursorStatic result; + for (auto&& i : PathPortionHashes) { + auto* pathHash = result.AddPathHashes(); + pathHash->SetPathId(i.first); + pathHash->SetHash(i.second); + } + return result; +} + +NKikimr::TConclusionStatus TSourceCursor::DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TSourceSession::TCursorDynamic& proto, + const NKikimrColumnShardDataSharingProto::TSourceSession::TCursorStatic& protoStatic) { + StartPathId = proto.GetStartPathId(); + StartPortionId = proto.GetStartPortionId(); + PackIdx = proto.GetPackIdx(); + if (!PackIdx) { + return TConclusionStatus::Fail("Incorrect proto cursor PackIdx value: " + proto.DebugString()); + } + if (proto.HasNextPathId()) { + AFL_VERIFY(proto.GetNextPathId() == *NextPathId)("next_local", *NextPathId)("proto", proto.GetNextPathId()); + } + if (proto.HasNextPortionId()) { + AFL_VERIFY(proto.GetNextPortionId() == *NextPortionId)("next_local", *NextPortionId)("proto", proto.GetNextPortionId()); + } + if (proto.HasAckReceivedForPackIdx()) { + AckReceivedForPackIdx = proto.GetAckReceivedForPackIdx(); + } else { + AckReceivedForPackIdx = 0; + } + for (auto&& i : proto.GetLinksModifiedTablets()) { + LinksModifiedTablets.emplace((TTabletId)i); + } + for (auto&& i : protoStatic.GetPathHashes()) { + PathPortionHashes.emplace(i.GetPathId(), i.GetHash()); + } + AFL_VERIFY(PathPortionHashes.size()); + StaticSaved = true; + return TConclusionStatus::Success(); +} + +TSourceCursor::TSourceCursor(const TTabletId selfTabletId, const std::set& pathIds, const TTransferContext transferContext) + : SelfTabletId(selfTabletId) + , TransferContext(transferContext) + , PathIds(pathIds) +{ +} + +bool TSourceCursor::Start(const std::shared_ptr& sharedBlobsManager, const THashMap>>& portions, const TVersionedIndex& index) { + AFL_VERIFY(!IsStartedFlag); + std::map>> local; + std::vector> portionsLock; + NArrow::NHash::NXX64::TStreamStringHashCalcer hashCalcer(0); + for (auto&& i : portions) { + hashCalcer.Start(); + std::map> portionsMap; + for (auto&& p : i.second) { + const ui64 portionId = p->GetPortionId(); + hashCalcer.Update((ui8*)&portionId, sizeof(portionId)); + AFL_VERIFY(portionsMap.emplace(portionId, p).second); + } + auto it = PathPortionHashes.find(i.first); + const ui64 hash = hashCalcer.Finish(); + if (it == PathPortionHashes.end()) { + PathPortionHashes.emplace(i.first, ::ToString(hash)); + } else { + AFL_VERIFY(::ToString(hash) == it->second); + } + local.emplace(i.first, std::move(portionsMap)); + } + std::swap(PortionsForSend, local); + if (!StartPathId) { + AFL_VERIFY(PortionsForSend.size()); + AFL_VERIFY(PortionsForSend.begin()->second.size()); + + NextPathId = PortionsForSend.begin()->first; + NextPortionId = PortionsForSend.begin()->second.begin()->first; + AFL_VERIFY(Next(sharedBlobsManager, index)); + } else { + BuildSelection(sharedBlobsManager, index); + } + IsStartedFlag = true; + return true; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/session/cursor.h b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.h new file mode 100644 index 000000000000..7e527758bf0a --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/session/cursor.h @@ -0,0 +1,108 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap { +class TColumnEngineForLogs; +class TVersionedIndex; +} + +namespace NKikimr::NOlap::NDataSharing { + +class TSharedBlobsManager; + +class TSourceCursor { +private: + std::map>> PortionsForSend; + THashMap PreviousSelected; + THashMap Selected; + THashMap Links; + YDB_READONLY(ui64, StartPathId, 0); + YDB_READONLY(ui64, StartPortionId, 0); + YDB_READONLY(ui64, PackIdx, 0); + TTabletId SelfTabletId; + TTransferContext TransferContext; + std::optional NextPathId = 0; + std::optional NextPortionId = 0; + THashSet LinksModifiedTablets; + ui64 AckReceivedForPackIdx = 0; + std::set PathIds; + THashMap PathPortionHashes; + bool IsStartedFlag = false; + YDB_ACCESSOR(bool, StaticSaved, false); + void BuildSelection(const std::shared_ptr& sharedBlobsManager, const TVersionedIndex& index); +public: + bool IsAckDataReceived() const { + return AckReceivedForPackIdx == PackIdx; + } + + bool IsStarted() const { + return IsStartedFlag; + } + + TConclusionStatus AckData(const ui64 packIdxReceived) { + AFL_VERIFY(packIdxReceived <= PackIdx); + if (packIdxReceived != PackIdx) { + return TConclusionStatus::Fail("incorrect packIdx received for AckData: " + ::ToString(packIdxReceived) + " but expected: " + ::ToString(PackIdx)); + } + AckReceivedForPackIdx = packIdxReceived; + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "SourceAckData")("pack", PackIdx)("pack_ack", AckReceivedForPackIdx)("links_ready", LinksModifiedTablets.size())("links_waiting", Links.size()); + return TConclusionStatus::Success(); + } + + TConclusionStatus AckLinks(const TTabletId tabletId, const ui64 packIdxReceived) { + if (packIdxReceived != PackIdx) { + return TConclusionStatus::Fail("incorrect packIdx received for AckLinks: " + ::ToString(packIdxReceived) + " but expected: " + ::ToString(PackIdx)); + } + AFL_VERIFY(Links.contains(tabletId)); + if (LinksModifiedTablets.emplace(tabletId).second) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "SourceAckData")("pack", PackIdx)("pack_ack", AckReceivedForPackIdx)("links_ready", LinksModifiedTablets.size())("links_waiting", Links.size()); + return TConclusionStatus::Success(); + } else { + return TConclusionStatus::Fail("AckLinks repeated table"); + } + } + + bool IsReadyForNext() const { + return AckReceivedForPackIdx == PackIdx && LinksModifiedTablets.size() == Links.size(); + } + + const THashSet& GetLinksModifiedTablets() const { + return LinksModifiedTablets; + } + + void AddLinksModifiedTablet(const TTabletId tabletId) { + LinksModifiedTablets.emplace(tabletId); + } + + const THashMap GetPreviousSelected() const { + return PreviousSelected; + } + + const THashMap& GetSelected() const { + return Selected; + } + + const THashMap& GetLinks() const { + return Links; + } + + bool Next(const std::shared_ptr& sharedBlobsManager, const TVersionedIndex& index); + + bool IsValid() { + return Selected.size(); + } + + TSourceCursor(const TTabletId selfTabletId, const std::set& pathIds, const TTransferContext transferContext); + + bool Start(const std::shared_ptr& sharedBlobsManager, const THashMap>>& portions, const TVersionedIndex& index); + + NKikimrColumnShardDataSharingProto::TSourceSession::TCursorDynamic SerializeDynamicToProto() const; + NKikimrColumnShardDataSharingProto::TSourceSession::TCursorStatic SerializeStaticToProto() const; + + [[nodiscard]] TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TSourceSession::TCursorDynamic& proto, + const NKikimrColumnShardDataSharingProto::TSourceSession::TCursorStatic& protoStatic); +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/session/source.cpp b/ydb/core/tx/columnshard/data_sharing/source/session/source.cpp new file mode 100644 index 000000000000..3cf78859289f --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/session/source.cpp @@ -0,0 +1,110 @@ +#include "source.h" +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +NKikimr::TConclusionStatus TSourceSession::DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TSourceSession& proto, + const std::optional& protoCursor, + const std::optional& protoCursorStatic) { + auto parseBase = TBase::DeserializeFromProto(proto); + if (!parseBase) { + return parseBase; + } + DestinationTabletId = (TTabletId)proto.GetDestinationTabletId(); + if (!(ui64)DestinationTabletId) { + return TConclusionStatus::Fail("Incorrect DestinationTabletId in proto."); + } + for (auto&& i : proto.GetPathIds()) { + if (!PathIds.emplace(i).second) { + return TConclusionStatus::Fail("PathIds contains duplicated values."); + } + } + if (PathIds.empty()) { + return TConclusionStatus::Fail("PathIds empty."); + } + AFL_VERIFY(PathIds.size()); + Cursor = std::make_shared(SelfTabletId, PathIds, TransferContext); + AFL_VERIFY(!!protoCursor == !!protoCursorStatic); + if (protoCursor) { + auto parsed = Cursor->DeserializeFromProto(*protoCursor, *protoCursorStatic); + if (!parsed) { + return parsed; + } + } + return TConclusionStatus::Success(); +} + +TConclusion> TSourceSession::AckFinished(NColumnShard::TColumnShard* self, const std::shared_ptr& selfPtr) { + return std::unique_ptr(new TTxFinishAckToSource(self, selfPtr, "ack_finished")); +} + +TConclusion> TSourceSession::AckData(NColumnShard::TColumnShard* self, const ui32 receivedPackIdx, const std::shared_ptr& selfPtr) { + auto ackResult = Cursor->AckData(receivedPackIdx); + if (!ackResult) { + return ackResult; + } + if (Cursor->IsReadyForNext()) { + Cursor->Next(self->GetStoragesManager()->GetSharedBlobsManager(), self->GetIndexAs().GetVersionedIndex()); + return std::unique_ptr(new TTxDataAckToSource(self, selfPtr, "ack_to_source_on_ack_data")); + } else { + return std::unique_ptr(new TTxWriteSourceCursor(self, selfPtr, "write_source_cursor_on_ack_data")); + } +} + +TConclusion> TSourceSession::AckLinks(NColumnShard::TColumnShard* self, const TTabletId tabletId, const ui32 receivedPackIdx, const std::shared_ptr& selfPtr) { + auto ackResult = Cursor->AckLinks(tabletId, receivedPackIdx); + if (!ackResult) { + return ackResult; + } + if (Cursor->IsReadyForNext()) { + Cursor->Next(self->GetStoragesManager()->GetSharedBlobsManager(), self->GetIndexAs().GetVersionedIndex()); + return std::unique_ptr(new TTxDataAckToSource(self, selfPtr, "ack_to_source_on_ack_links")); + } else { + return std::unique_ptr(new TTxWriteSourceCursor(self, selfPtr, "write_source_cursor_on_ack_links")); + } +} + +void TSourceSession::ActualizeDestination(const std::shared_ptr& dataLocksManager) { + AFL_VERIFY(IsStarted() || IsStarting()); + AFL_VERIFY(Cursor); + if (Cursor->IsValid()) { + if (!Cursor->IsAckDataReceived()) { + const THashMap& packPortions = Cursor->GetSelected(); + auto ev = std::make_unique(GetSessionId(), Cursor->GetPackIdx(), SelfTabletId, packPortions); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(ev.release(), (ui64)DestinationTabletId, true), IEventHandle::FlagTrackDelivery, GetRuntimeId()); + } + { + const auto& links = Cursor->GetLinks(); + for (auto&& [tabletId, task] : links) { + if (Cursor->GetLinksModifiedTablets().contains(tabletId)) { + continue; + } + auto ev = std::make_unique(SelfTabletId, GetSessionId(), Cursor->GetPackIdx(), task); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(ev.release(), (ui64)tabletId, true), IEventHandle::FlagTrackDelivery, GetRuntimeId()); + } + } + } else { + auto ev = std::make_unique(GetSessionId(), SelfTabletId); + NActors::TActivationContext::AsActorContext().Send(MakePipePeNodeCacheID(false), + new TEvPipeCache::TEvForward(ev.release(), (ui64)DestinationTabletId, true), IEventHandle::FlagTrackDelivery, GetRuntimeId()); + Finish(dataLocksManager); + } +} + +bool TSourceSession::DoStart(const NColumnShard::TColumnShard& shard, const THashMap>>& portions) { + AFL_VERIFY(Cursor); + if (Cursor->Start(shard.GetStoragesManager()->GetSharedBlobsManager(), portions, shard.GetIndexAs().GetVersionedIndex())) { + ActualizeDestination(shard.GetDataLocksManager()); + return true; + } else { + return false; + } +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/session/source.h b/ydb/core/tx/columnshard/data_sharing/source/session/source.h new file mode 100644 index 000000000000..319302fe92d8 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/session/source.h @@ -0,0 +1,91 @@ +#pragma once +#include "cursor.h" +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TSharedBlobsManager; + +class TSourceSession: public TCommonSession { +private: + using TBase = TCommonSession; + const TTabletId SelfTabletId; + std::shared_ptr Cursor; + YDB_READONLY_DEF(std::set, PathIds); + TTabletId DestinationTabletId = TTabletId(0); +protected: + virtual bool DoStart(const NColumnShard::TColumnShard& shard, const THashMap>>& portions) override; + virtual THashSet GetPathIdsForStart() const override { + THashSet result; + for (auto&& i : PathIds) { + result.emplace(i); + } + return result; + } +public: + TSourceSession(const TTabletId selfTabletId) + : TBase("source_proto") + , SelfTabletId(selfTabletId) + { + + } + + TSourceSession(const TString& sessionId, const TTransferContext& transfer, const TTabletId selfTabletId, const std::set& pathIds, const TTabletId destTabletId) + : TBase(sessionId, "source_base", transfer) + , SelfTabletId(selfTabletId) + , PathIds(pathIds) + , DestinationTabletId(destTabletId) + { + } + + TTabletId GetDestinationTabletId() const { + return DestinationTabletId; + } + + TString DebugString() const { + return TStringBuilder() << "{base=" << TBase::DebugString() << ";destination_tablet_id=" << (ui64)DestinationTabletId << ";}"; + } + + bool IsEqualTo(const TSourceSession& item) const { + return + TBase::IsEqualTo(item) && + DestinationTabletId == item.DestinationTabletId && + PathIds == item.PathIds; + } + + std::shared_ptr GetCursorVerified() const { + AFL_VERIFY(!!Cursor); + return Cursor; + } + + bool TryNextCursor(const ui32 packIdx, const std::shared_ptr& sharedBlobsManager, const TVersionedIndex& index) { + AFL_VERIFY(Cursor); + if (packIdx != Cursor->GetPackIdx()) { + return false; + } + Cursor->Next(sharedBlobsManager, index); + return true; + } + + [[nodiscard]] TConclusion> AckFinished(NColumnShard::TColumnShard* self, const std::shared_ptr& selfPtr); + [[nodiscard]] TConclusion> AckData(NColumnShard::TColumnShard* self, const ui32 receivedPackIdx, const std::shared_ptr& selfPtr); + [[nodiscard]] TConclusion> AckLinks(NColumnShard::TColumnShard* self, const TTabletId tabletId, const ui32 packIdx, const std::shared_ptr& selfPtr); + + void ActualizeDestination(const std::shared_ptr& dataLocksManager); + + NKikimrColumnShardDataSharingProto::TSourceSession SerializeDataToProto() const { + NKikimrColumnShardDataSharingProto::TSourceSession result; + TBase::SerializeToProto(result); + result.SetDestinationTabletId((ui64)DestinationTabletId); + for (auto&& i : PathIds) { + result.AddPathIds(i); + } + return result; + } + + [[nodiscard]] TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TSourceSession& proto, + const std::optional& protoCursor, + const std::optional& protoCursorStatic); +}; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/session/ya.make b/ydb/core/tx/columnshard/data_sharing/source/session/ya.make new file mode 100644 index 000000000000..c045cca16e17 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/session/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + source.cpp + cursor.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/common/session + ydb/core/tx/columnshard/data_sharing/destination/events + ydb/core/tx/columnshard/data_sharing/source/transactions +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.cpp b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.cpp new file mode 100644 index 000000000000..146a93bc706c --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.cpp @@ -0,0 +1,40 @@ +#include "tx_data_ack_to_source.h" +#include + +namespace NKikimr::NOlap::NDataSharing { + +bool TTxDataAckToSource::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NColumnShard; + THashMap sharedTabletBlobIds; + { + THashMap> sharedBlobIds; + auto& index = Self->GetIndexAs().GetVersionedIndex(); + for (auto&& [_, i] : Session->GetCursorVerified()->GetPreviousSelected()) { + for (auto&& portion : i.GetPortions()) { + portion.FillBlobIdsByStorage(sharedBlobIds, index); + } + } + for (auto&& i : sharedBlobIds) { + AFL_VERIFY(sharedTabletBlobIds[i.first].Add(Session->GetDestinationTabletId(), i.second)); + sharedTabletBlobIds[i.first].Add(Self->GetStoragesManager()->GetSharedBlobsManager()->GetSelfTabletId(), std::move(i.second)); + } + Self->GetStoragesManager()->GetSharedBlobsManager()->WriteSharedBlobsDB(txc, sharedTabletBlobIds); + } + + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Session->GetSessionId()) + .Update(NIceDb::TUpdate(Session->GetCursorVerified()->SerializeDynamicToProto().SerializeAsString())); + if (!Session->GetCursorVerified()->GetStaticSaved()) { + db.Table().Key(Session->GetSessionId()) + .Update(NIceDb::TUpdate(Session->GetCursorVerified()->SerializeStaticToProto().SerializeAsString())); + Session->GetCursorVerified()->SetStaticSaved(true); + } + std::swap(SharedBlobIds, sharedTabletBlobIds); + return true; +} + +void TTxDataAckToSource::DoComplete(const TActorContext& /*ctx*/) { + Session->ActualizeDestination(Self->GetDataLocksManager()); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.h b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.h new file mode 100644 index 000000000000..a8a4756a94f8 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_data_ack_to_source.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTxDataAckToSource: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; + THashMap SharedBlobIds; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& ctx) override; +public: + TTxDataAckToSource(NColumnShard::TColumnShard* self, const std::shared_ptr& session, const TString& info) + : TBase(self, info) + , Session(session) + { + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_DATA_ACK_TO_SOURCE; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_finish_ack_to_source.cpp b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_finish_ack_to_source.cpp new file mode 100644 index 000000000000..913cc7f0ac5c --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_finish_ack_to_source.cpp @@ -0,0 +1,15 @@ +#include "tx_finish_ack_to_source.h" + +namespace NKikimr::NOlap::NDataSharing { +bool TTxFinishAckToSource::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NKikimr::NColumnShard; + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Session->GetSessionId()).Delete(); + return true; +} + +void TTxFinishAckToSource::DoComplete(const TActorContext& /*ctx*/) { + Self->SharingSessionsManager->RemoveSourceSession(Session->GetSessionId()); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_finish_ack_to_source.h b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_finish_ack_to_source.h new file mode 100644 index 000000000000..3c03f5f7e9c7 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_finish_ack_to_source.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTxFinishAckToSource: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& /*ctx*/) override; +public: + TTxFinishAckToSource(NColumnShard::TColumnShard* self, const std::shared_ptr& session, const TString& info) + : TBase(self, info) + , Session(session) + { + AFL_VERIFY(!Session->GetCursorVerified()->IsValid()); + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_FINISH_ACK_TO_SOURCE; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_start_to_source.cpp b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_start_to_source.cpp new file mode 100644 index 000000000000..687b2e3661e0 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_start_to_source.cpp @@ -0,0 +1,20 @@ +#include "tx_start_to_source.h" +#include + +namespace NKikimr::NOlap::NDataSharing { + +bool TTxStartToSource::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Session->GetSessionId()) + .Update(NIceDb::TUpdate(Session->SerializeDataToProto().SerializeAsString())); + return true; +} + +void TTxStartToSource::DoComplete(const TActorContext& /*ctx*/) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("info", "TTxStartToSource::Complete"); + AFL_VERIFY(Sessions->emplace(Session->GetSessionId(), Session).second); + Session->Start(*Self); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_start_to_source.h b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_start_to_source.h new file mode 100644 index 000000000000..6506f269a75c --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_start_to_source.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTxStartToSource: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; + THashMap>* Sessions; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& ctx) override; +public: + TTxStartToSource(NColumnShard::TColumnShard* self, const std::shared_ptr& session, THashMap>& sessions, const TString& info) + : TBase(self, info) + , Session(session) + , Sessions(&sessions) + { + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_START_TO_SOURCE; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_write_source_cursor.cpp b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_write_source_cursor.cpp new file mode 100644 index 000000000000..4af96622de2b --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_write_source_cursor.cpp @@ -0,0 +1,17 @@ +#include "tx_write_source_cursor.h" +#include + +namespace NKikimr::NOlap::NDataSharing { + +bool TTxWriteSourceCursor::DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(Session->GetSessionId()) + .Update(NIceDb::TUpdate(Session->GetCursorVerified()->SerializeDynamicToProto().SerializeAsString())); + return true; +} + +void TTxWriteSourceCursor::DoComplete(const TActorContext& /*ctx*/) { +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_write_source_cursor.h b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_write_source_cursor.h new file mode 100644 index 000000000000..315e6038ed41 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/tx_write_source_cursor.h @@ -0,0 +1,27 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NDataSharing { + +class TTxWriteSourceCursor: public TExtendedTransactionBase { +private: + using TBase = TExtendedTransactionBase; + std::shared_ptr Session; +protected: + virtual bool DoExecute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& ctx) override; + virtual void DoComplete(const TActorContext& ctx) override; +public: + TTxWriteSourceCursor(NColumnShard::TColumnShard* self, const std::shared_ptr& session, const TString& info) + : TBase(self, info) + , Session(session) + { + } + + TTxType GetTxType() const override { return NColumnShard::TXTYPE_DATA_SHARING_WRITE_SOURCE_CURSOR; } +}; + + +} diff --git a/ydb/core/tx/columnshard/data_sharing/source/transactions/ya.make b/ydb/core/tx/columnshard/data_sharing/source/transactions/ya.make new file mode 100644 index 000000000000..90269b952ed4 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/transactions/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + tx_start_to_source.cpp + tx_data_ack_to_source.cpp + tx_finish_ack_to_source.cpp + tx_write_source_cursor.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/common/transactions +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/source/ya.make b/ydb/core/tx/columnshard/data_sharing/source/ya.make new file mode 100644 index 000000000000..ed17858d4057 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/source/ya.make @@ -0,0 +1,9 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/source/session + ydb/core/tx/columnshard/data_sharing/source/transactions + ydb/core/tx/columnshard/data_sharing/source/events +) + +END() diff --git a/ydb/core/tx/columnshard/data_sharing/ya.make b/ydb/core/tx/columnshard/data_sharing/ya.make new file mode 100644 index 000000000000..f3f443c03540 --- /dev/null +++ b/ydb/core/tx/columnshard/data_sharing/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/data_sharing/common + ydb/core/tx/columnshard/data_sharing/destination + ydb/core/tx/columnshard/data_sharing/source + ydb/core/tx/columnshard/data_sharing/initiator + ydb/core/tx/columnshard/data_sharing/manager + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/data_sharing/modification +) + +END() diff --git a/ydb/core/tx/columnshard/defs.h b/ydb/core/tx/columnshard/defs.h index f3d5c90bc181..76d1cc3a2bb0 100644 --- a/ydb/core/tx/columnshard/defs.h +++ b/ydb/core/tx/columnshard/defs.h @@ -1,4 +1,5 @@ #pragma once +#include "common/blob.h" #include #include #include diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp index 8d3c4ae87610..1f6f50664bb8 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp +++ b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.cpp @@ -2,6 +2,7 @@ #include #include #include +#include #include namespace NKikimr::NOlap { @@ -17,13 +18,7 @@ TString TColumnEngineChanges::DebugString() const { TConclusionStatus TColumnEngineChanges::ConstructBlobs(TConstructionContext& context) noexcept { Y_ABORT_UNLESS(Stage == EStage::Started); - { - ui64 readBytes = 0; - for (auto&& i : Blobs) { - readBytes += i.first.Size; - } - context.Counters.CompactionInputSize(readBytes); - } + context.Counters.CompactionInputSize(Blobs.GetTotalBlobsSize()); const TMonotonic start = TMonotonic::Now(); TConclusionStatus result = DoConstructBlobs(context); if (result.Ok()) { @@ -35,31 +30,24 @@ TConclusionStatus TColumnEngineChanges::ConstructBlobs(TConstructionContext& con return result; } -bool TColumnEngineChanges::ApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { - Y_ABORT_UNLESS(Stage == EStage::Compiled); - Y_ABORT_UNLESS(DoApplyChanges(self, context)); - Stage = EStage::Applied; - return true; -} - -void TColumnEngineChanges::WriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) { +void TColumnEngineChanges::WriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { Y_ABORT_UNLESS(Stage != EStage::Aborted); - if ((ui32)Stage >= (ui32)EStage::Written) { - return; - } - Y_ABORT_UNLESS(Stage == EStage::Applied); + Y_ABORT_UNLESS(Stage <= EStage::Written); + Y_ABORT_UNLESS(Stage >= EStage::Compiled); - DoWriteIndex(self, context); + DoWriteIndexOnExecute(self, context); Stage = EStage::Written; } -void TColumnEngineChanges::WriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) { - Y_ABORT_UNLESS(Stage == EStage::Written); +void TColumnEngineChanges::WriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { + Y_ABORT_UNLESS(Stage == EStage::Written || !self); Stage = EStage::Finished; AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "WriteIndexComplete")("type", TypeString())("success", context.FinishedSuccessfully); - DoWriteIndexComplete(self, context); - DoOnFinish(self, context); - self.IncCounter(GetCounterIndex(context.FinishedSuccessfully)); + DoWriteIndexOnComplete(self, context); + if (self) { + OnFinish(*self, context); + self->IncCounter(GetCounterIndex(context.FinishedSuccessfully)); + } } @@ -76,17 +64,20 @@ void TColumnEngineChanges::Compile(TFinalizationContext& context) noexcept { } TColumnEngineChanges::~TColumnEngineChanges() { - Y_DEBUG_ABORT_UNLESS(!NActors::TlsActivationContext || Stage == EStage::Created || Stage == EStage::Finished || Stage == EStage::Aborted); +// AFL_VERIFY_DEBUG(!NActors::TlsActivationContext || Stage == EStage::Created || Stage == EStage::Finished || Stage == EStage::Aborted)("stage", Stage); } void TColumnEngineChanges::Abort(NColumnShard::TColumnShard& self, TChangesFinishContext& context) { - Y_ABORT_UNLESS(Stage != EStage::Finished && Stage != EStage::Created && Stage != EStage::Aborted); + AFL_VERIFY(Stage != EStage::Finished && Stage != EStage::Created && Stage != EStage::Aborted)("stage", Stage)("reason", context.ErrorMessage); Stage = EStage::Aborted; - DoOnFinish(self, context); + OnFinish(self, context); } void TColumnEngineChanges::Start(NColumnShard::TColumnShard& self) { + AFL_VERIFY(!LockGuard); + LockGuard = self.DataLocksManager->RegisterLock(BuildDataLock()); Y_ABORT_UNLESS(Stage == EStage::Created); + NYDBTest::TControllers::GetColumnShardController()->OnWriteIndexStart(self.TabletID(), *this); DoStart(self); Stage = EStage::Started; if (!NeedConstruction()) { @@ -102,16 +93,23 @@ void TColumnEngineChanges::StartEmergency() { } } -void TColumnEngineChanges::AbortEmergency() { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "AbortEmergency"); +void TColumnEngineChanges::AbortEmergency(const TString& reason) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "AbortEmergency")("reason", reason); Stage = EStage::Aborted; OnAbortEmergency(); } -TWriteIndexContext::TWriteIndexContext(NTabletFlatExecutor::TTransactionContext& txc, IDbWrapper& dbWrapper) - : Txc(txc) - , BlobManagerDb(std::make_shared(txc.DB)) +void TColumnEngineChanges::OnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) { + if (!!LockGuard) { + LockGuard->Release(*self.DataLocksManager); + } + DoOnFinish(self, context); +} + +TWriteIndexContext::TWriteIndexContext(NTable::TDatabase* db, IDbWrapper& dbWrapper, TColumnEngineForLogs& engineLogs) + : DB(db) , DBWrapper(dbWrapper) + , EngineLogs(engineLogs) { } diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h index dc5b3b2eb0f4..452e6e331292 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h +++ b/ydb/core/tx/columnshard/engines/changes/abstract/abstract.h @@ -1,12 +1,18 @@ #pragma once #include "settings.h" +#include +#include +#include #include #include +#include +#include +#include +#include +#include #include #include #include -#include -#include #include #include @@ -24,7 +30,6 @@ class TTransactionContext; namespace NKikimr::NColumnShard { class TBlobManagerDb; class TColumnShard; -class TBackgroundActivity; } namespace NKikimr::NOlap { @@ -32,17 +37,69 @@ class TColumnEngineForLogs; class TVersionedIndex; class TPortionInfoWithBlobs; -struct TPortionEvictionFeatures { - const TString TargetTierName; - const ui64 PathId; // portion path id for cold-storage-key construct - bool DataChanges = true; - const std::shared_ptr StorageOperator; - - TPortionEvictionFeatures(const TString& targetTierName, const ui64 pathId, const std::shared_ptr& storageOperator) - : TargetTierName(targetTierName) - , PathId(pathId) - , StorageOperator(storageOperator) - {} +class TPortionEvictionFeatures { +private: + YDB_READONLY_DEF(std::shared_ptr, CurrentScheme); + YDB_READONLY_DEF(std::shared_ptr, TargetScheme); + std::optional TargetTierName; + const TString CurrentTierName; + std::optional RWAddress; +public: + TPortionEvictionFeatures(const std::shared_ptr& currentScheme, const std::shared_ptr& targetScheme, const TString& currentTierName) + : CurrentScheme(currentScheme) + , TargetScheme(targetScheme) + , CurrentTierName(currentTierName) + { + AFL_VERIFY(CurrentTierName); + } + + const TString& GetTargetTierName() const { + AFL_VERIFY(TargetTierName); + return *TargetTierName; + } + + void SetTargetTierName(const TString& value) { + AFL_VERIFY(!TargetTierName); + TargetTierName = value; + } + + void OnSkipPortionWithProcessMemory(const NColumnShard::TEngineLogsCounters& counters, const TDuration dWait) const { + if (TargetTierName == NTiering::NCommon::DeleteTierName) { + counters.OnSkipDeleteWithProcessMemory(dWait); + } else { + counters.OnSkipEvictionWithProcessMemory(dWait); + } + } + + void OnSkipPortionWithTxLimit(const NColumnShard::TEngineLogsCounters& counters, const TDuration dWait) const { + if (TargetTierName == NTiering::NCommon::DeleteTierName) { + counters.OnSkipDeleteWithTxLimit(dWait); + } else { + counters.OnSkipEvictionWithTxLimit(dWait); + } + } + + NActualizer::TRWAddress GetRWAddress() { + if (!RWAddress) { + AFL_VERIFY(TargetTierName); + RWAddress = NActualizer::TRWAddress(CurrentScheme->GetIndexInfo().GetUsedStorageIds(CurrentTierName), TargetScheme->GetIndexInfo().GetUsedStorageIds(*TargetTierName)); + } + return *RWAddress; + } + + bool NeedRewrite() const { + if (TargetTierName == NTiering::NCommon::DeleteTierName) { + return false; + } + if (CurrentTierName != TargetTierName) { + return true; + } + if (CurrentScheme->GetVersion() != TargetScheme->GetVersion()) { + return true; + } + AFL_VERIFY(false); + return false; + } }; class TFinalizationContext: TNonCopyable { @@ -71,10 +128,10 @@ class TFinalizationContext: TNonCopyable { class TWriteIndexContext: TNonCopyable { public: - NTabletFlatExecutor::TTransactionContext& Txc; - std::shared_ptr BlobManagerDb; + NTable::TDatabase* DB; IDbWrapper& DBWrapper; - TWriteIndexContext(NTabletFlatExecutor::TTransactionContext& txc, IDbWrapper& dbWrapper); + TColumnEngineForLogs& EngineLogs; + TWriteIndexContext(NTable::TDatabase* db, IDbWrapper& dbWrapper, TColumnEngineForLogs& engineLogs); }; class TChangesFinishContext { @@ -98,38 +155,30 @@ class TWriteIndexCompleteContext: TNonCopyable, public TChangesFinishContext { const ui32 BlobsWritten; const ui64 BytesWritten; const TDuration Duration; - NColumnShard::TBackgroundActivity& TriggerActivity; + TColumnEngineForLogs& EngineLogs; TWriteIndexCompleteContext(const TActorContext& actorContext, const ui32 blobsWritten, const ui64 bytesWritten - , const TDuration d, NColumnShard::TBackgroundActivity& triggerActivity) + , const TDuration d, TColumnEngineForLogs& engineLogs) : ActorContext(actorContext) , BlobsWritten(blobsWritten) , BytesWritten(bytesWritten) , Duration(d) - , TriggerActivity(triggerActivity) + , EngineLogs(engineLogs) { } }; -class TApplyChangesContext: TNonCopyable { -public: - IDbWrapper& DB; - const TSnapshot Snapshot; - TApplyChangesContext(IDbWrapper& db, const TSnapshot& snapshot) - : DB(db) - , Snapshot(snapshot) { - - } -}; - class TConstructionContext: TNonCopyable { public: const TVersionedIndex& SchemaVersions; const NColumnShard::TIndexationCounters Counters; + const NOlap::TSnapshot LastCommittedTx; - TConstructionContext(const TVersionedIndex& schemaVersions, const NColumnShard::TIndexationCounters& counters) + TConstructionContext(const TVersionedIndex& schemaVersions, const NColumnShard::TIndexationCounters& counters, const NOlap::TSnapshot& lastCommittedTx) : SchemaVersions(schemaVersions) - , Counters(counters) { + , Counters(counters) + , LastCommittedTx(lastCommittedTx) + { } }; @@ -143,20 +192,19 @@ class TColumnEngineChanges { Started, Constructed, Compiled, - Applied, Written, Finished, Aborted }; private: EStage Stage = EStage::Created; + std::shared_ptr LockGuard; protected: virtual void DoDebugString(TStringOutput& out) const = 0; virtual void DoCompile(TFinalizationContext& context) = 0; - virtual void DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) = 0; - virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) = 0; + virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) = 0; + virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) = 0; virtual void DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) = 0; - virtual bool DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) = 0; virtual bool NeedConstruction() const { return true; } @@ -172,6 +220,11 @@ class TColumnEngineChanges { const TString TaskIdentifier = TGUID::Create().AsGuidString(); virtual ui64 DoCalcMemoryForUsage() const = 0; + virtual std::shared_ptr DoBuildDataLock() const = 0; + std::shared_ptr BuildDataLock() const { + return DoBuildDataLock(); + } + public: class IMemoryPredictor { public: @@ -179,6 +232,8 @@ class TColumnEngineChanges { virtual ~IMemoryPredictor() = default; }; + void OnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context); + ui64 CalcMemoryForUsage() const { return DoCalcMemoryForUsage(); } @@ -195,7 +250,7 @@ class TColumnEngineChanges { return BlobsAction; } - TColumnEngineChanges(const std::shared_ptr& storagesManager, const TString& consumerId) + TColumnEngineChanges(const std::shared_ptr& storagesManager, const NBlobOperations::EConsumer consumerId) : BlobsAction(storagesManager, consumerId) { @@ -208,30 +263,22 @@ class TColumnEngineChanges { return Stage == EStage::Aborted; } - virtual THashSet GetTouchedPortions() const = 0; - void StartEmergency(); - void AbortEmergency(); + void AbortEmergency(const TString& reason); void Abort(NColumnShard::TColumnShard& self, TChangesFinishContext& context); void Start(NColumnShard::TColumnShard& self); - bool ApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context); virtual ui32 GetWritePortionsCount() const = 0; virtual TPortionInfoWithBlobs* GetWritePortionInfo(const ui32 index) = 0; virtual bool NeedWritePortion(const ui32 index) const = 0; - void WriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context); - void WriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context); + void WriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context); + void WriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context); void Compile(TFinalizationContext& context) noexcept; - void SetBlobs(THashMap&& blobs) { - Y_ABORT_UNLESS(!blobs.empty()); - Blobs = std::move(blobs); - } - - THashMap Blobs; + NBlobOperations::NRead::TCompositeReadBlobs Blobs; std::shared_ptr ResourcesGuard; std::vector> GetReadingActions() const { @@ -243,11 +290,7 @@ class TColumnEngineChanges { TString DebugString() const; ui64 TotalBlobsSize() const { - ui64 size = 0; - for (const auto& [_, blob] : Blobs) { - size += blob.size(); - } - return size; + return Blobs.GetTotalBlobsSize(); } }; diff --git a/ydb/core/tx/columnshard/engines/changes/abstract/settings.h b/ydb/core/tx/columnshard/engines/changes/abstract/settings.h index ec966965d52d..20dd155ba426 100644 --- a/ydb/core/tx/columnshard/engines/changes/abstract/settings.h +++ b/ydb/core/tx/columnshard/engines/changes/abstract/settings.h @@ -32,13 +32,6 @@ struct TCompactionLimits { i64 GranuleSizeForOverloadPrevent = WARNING_OVERLOAD_GRANULE_SIZE; i64 GranuleIndexedPortionsSizeLimit = WARNING_INSERTED_PORTIONS_SIZE; ui32 GranuleIndexedPortionsCountLimit = WARNING_INSERTED_PORTIONS_COUNT; - - TSplitSettings GetSplitSettings() const { - return TSplitSettings() - .SetMinBlobSize(0.5 * std::min(MAX_BLOB_SIZE, GranuleSizeForOverloadPrevent)) - .SetMaxBlobSize(std::min(MAX_BLOB_SIZE, GranuleSizeForOverloadPrevent)) - .SetMaxPortionSize(0.5 * GranuleSizeForOverloadPrevent); - } }; } diff --git a/ydb/core/tx/columnshard/engines/changes/actualization/construction/context.cpp b/ydb/core/tx/columnshard/engines/changes/actualization/construction/context.cpp new file mode 100644 index 000000000000..6b1ea617f219 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/actualization/construction/context.cpp @@ -0,0 +1,73 @@ +#include "context.h" +#include +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +TTieringProcessContext::TTieringProcessContext(const ui64 memoryUsageLimit, const TSaverContext& saverContext, + const std::shared_ptr& dataLocksManager, const NColumnShard::TEngineLogsCounters& counters, const std::shared_ptr& controller) + : MemoryUsageLimit(memoryUsageLimit) + , SaverContext(saverContext) + , Counters(counters) + , Controller(controller) + , DataLocksManager(dataLocksManager) + , Now(TlsActivationContext ? AppData()->TimeProvider->Now() : TInstant::Now()) +{ + +} + +bool TTieringProcessContext::AddPortion(const TPortionInfo& info, TPortionEvictionFeatures&& features, const std::optional dWait) { + if (!UsedPortions.emplace(info.GetAddress()).second) { + return true; + } + if (DataLocksManager->IsLocked(info)) { + return true; + } + + const auto buildNewTask = [&]() { + return TTaskConstructor(TTTLColumnEngineChanges::BuildMemoryPredictor(), std::make_shared(features.GetRWAddress(), SaverContext)); + }; + auto it = Tasks.find(features.GetRWAddress()); + if (it == Tasks.end()) { + std::vector tasks = {buildNewTask()}; + it = Tasks.emplace(features.GetRWAddress(), std::move(tasks)).first; + } + if (it->second.back().GetTxWriteVolume() + info.GetTxVolume() > TGlobalLimits::TxWriteLimitBytes && it->second.back().GetTxWriteVolume()) { + if (Controller->IsNewTaskAvailable(it->first, it->second.size())) { + it->second.emplace_back(buildNewTask()); + } else { + return false; + } + features.OnSkipPortionWithProcessMemory(Counters, *dWait); + } + if (features.NeedRewrite()) { + if (MemoryUsageLimit <= it->second.back().GetMemoryUsage()) { + if (Controller->IsNewTaskAvailable(it->first, it->second.size())) { + it->second.emplace_back(buildNewTask()); + } else { + return false; + } + features.OnSkipPortionWithTxLimit(Counters, *dWait); + } + it->second.back().MutableMemoryUsage() = it->second.back().GetMemoryPredictor()->AddPortion(info); + } + it->second.back().MutableTxWriteVolume() += info.GetTxVolume(); + if (features.GetTargetTierName() == NTiering::NCommon::DeleteTierName) { + AFL_VERIFY(dWait); + Counters.OnPortionToDrop(info.GetTotalBlobBytes(), *dWait); + it->second.back().GetTask()->PortionsToRemove.emplace(info.GetAddress(), info); + AFL_VERIFY(!it->second.back().GetTask()->GetPortionsToEvictCount())("rw", features.GetRWAddress().DebugString())("f", it->first.DebugString()); + } else { + if (!dWait) { + AFL_VERIFY(features.GetCurrentScheme()->GetVersion() < features.GetTargetScheme()->GetVersion()); + } else { + Counters.OnPortionToEvict(info.GetTotalBlobBytes(), *dWait); + } + it->second.back().GetTask()->AddPortionToEvict(info, std::move(features)); + AFL_VERIFY(it->second.back().GetTask()->PortionsToRemove.empty())("rw", features.GetRWAddress().DebugString())("f", it->first.DebugString()); + } + return true; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/changes/actualization/construction/context.h b/ydb/core/tx/columnshard/engines/changes/actualization/construction/context.h new file mode 100644 index 000000000000..0e5a5e326d3a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/actualization/construction/context.h @@ -0,0 +1,62 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NActualizer { + +class TTaskConstructor { +private: + YDB_READONLY_DEF(std::shared_ptr, MemoryPredictor); + YDB_READONLY_DEF(std::shared_ptr, Task); + YDB_ACCESSOR(ui64, MemoryUsage, 0); + YDB_ACCESSOR(ui64, TxWriteVolume, 0); +public: + TTaskConstructor(const std::shared_ptr& predictor, const std::shared_ptr& task) + : MemoryPredictor(predictor) + , Task(task) { + + } +}; + +class TTieringProcessContext { +private: + THashSet UsedPortions; + const ui64 MemoryUsageLimit; + TSaverContext SaverContext; + THashMap> Tasks; + const NColumnShard::TEngineLogsCounters Counters; + std::shared_ptr Controller; +public: + const std::shared_ptr DataLocksManager; + const TInstant Now = AppDataVerified().TimeProvider->Now(); + + const NColumnShard::TEngineLogsCounters GetCounters() const { + return Counters; + } + + const THashMap>& GetTasks() const { + return Tasks; + } + + bool AddPortion(const TPortionInfo& info, TPortionEvictionFeatures&& features, const std::optional dWait); + + bool IsRWAddressAvailable(const TRWAddress& address) const { + auto it = Tasks.find(address); + if (it == Tasks.end()) { + return Controller->IsNewTaskAvailable(address, 0); + } else { + return Controller->IsNewTaskAvailable(address, it->second.size()); + } + } + + TTieringProcessContext(const ui64 memoryUsageLimit, const TSaverContext& saverContext, const std::shared_ptr& dataLocksManager, + const NColumnShard::TEngineLogsCounters& counters, const std::shared_ptr& controller); +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/changes/actualization/construction/ya.make b/ydb/core/tx/columnshard/engines/changes/actualization/construction/ya.make new file mode 100644 index 000000000000..12df95df6331 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/actualization/construction/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + context.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/actualization/controller/controller.cpp b/ydb/core/tx/columnshard/engines/changes/actualization/controller/controller.cpp new file mode 100644 index 000000000000..672e02fd71f4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/actualization/controller/controller.cpp @@ -0,0 +1,15 @@ +#include "controller.h" + +namespace NKikimr::NOlap::NActualizer { + +ui32 TController::GetLimitForAddress(const NActualizer::TRWAddress& address) const { + if (address.WriteIs(NTiering::NCommon::DeleteTierName)) { + return 16; + } else if (address.ReadIs(IStoragesManager::DefaultStorageId) && address.WriteIs(IStoragesManager::DefaultStorageId)) { + return 16; + } else { + return 1; + } +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/changes/actualization/controller/controller.h b/ydb/core/tx/columnshard/engines/changes/actualization/controller/controller.h new file mode 100644 index 000000000000..bae5930bec6a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/actualization/controller/controller.h @@ -0,0 +1,31 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +class TController { +private: + THashMap ActualizationsInProgress; + ui32 GetLimitForAddress(const NActualizer::TRWAddress& address) const; + +public: + void StartActualization(const NActualizer::TRWAddress& address) { + AFL_VERIFY(++ActualizationsInProgress[address] <= (i32)GetLimitForAddress(address)); + } + + void FinishActualization(const NActualizer::TRWAddress& address) { + AFL_VERIFY(--ActualizationsInProgress[address] >= 0); + } + + bool IsNewTaskAvailable(const NActualizer::TRWAddress& address, const ui32 readyTemporaryTasks) const { + auto it = ActualizationsInProgress.find(address); + if (it == ActualizationsInProgress.end()) { + return readyTemporaryTasks < GetLimitForAddress(address); + } else { + return it->second + readyTemporaryTasks < GetLimitForAddress(address); + } + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/changes/actualization/controller/ya.make b/ydb/core/tx/columnshard/engines/changes/actualization/controller/ya.make new file mode 100644 index 000000000000..e92d5a71cdd6 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/actualization/controller/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + controller.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/actualization/ya.make b/ydb/core/tx/columnshard/engines/changes/actualization/ya.make new file mode 100644 index 000000000000..4362b8dffa5e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/actualization/ya.make @@ -0,0 +1,8 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/engines/changes/actualization/construction + ydb/core/tx/columnshard/engines/changes/actualization/controller +) + +END() diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup.cpp deleted file mode 100644 index e99f5730913e..000000000000 --- a/ydb/core/tx/columnshard/engines/changes/cleanup.cpp +++ /dev/null @@ -1,62 +0,0 @@ -#include "cleanup.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -void TCleanupColumnEngineChanges::DoDebugString(TStringOutput& out) const { - if (ui32 dropped = PortionsToDrop.size()) { - out << "drop " << dropped << " portions"; - for (auto& portionInfo : PortionsToDrop) { - out << portionInfo.DebugString(); - } - } -} - -void TCleanupColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) { - self.IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); - THashSet pathIds; - for (auto&& p : PortionsToDrop) { - auto removing = BlobsAction.GetRemoving(p); - for (auto&& r : p.Records) { - removing->DeclareRemove(r.BlobRange.BlobId); - } - pathIds.emplace(p.GetPathId()); - self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p.RawBytesSum()); - } - for (auto&& p: pathIds) { - self.TablesManager.TryFinalizeDropPath(context.Txc, p); - } -} - -bool TCleanupColumnEngineChanges::DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { - for (auto& portionInfo : PortionsToDrop) { - if (!self.ErasePortion(portionInfo)) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "Cannot erase portion")("portion", portionInfo.DebugString()); - continue; - } - portionInfo.RemoveFromDatabase(context.DB); - } - - return true; -} - -void TCleanupColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { - self.BackgroundController.StartCleanup(); -} - -void TCleanupColumnEngineChanges::DoWriteIndexComplete(NColumnShard::TColumnShard& /*self*/, TWriteIndexCompleteContext& context) { - context.TriggerActivity = NeedRepeat ? NColumnShard::TBackgroundActivity::Cleanup() : NColumnShard::TBackgroundActivity::None(); -} - -void TCleanupColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& /*context*/) { - self.BackgroundController.FinishCleanup(); -} - -NColumnShard::ECumulativeCounters TCleanupColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { - return isSuccess ? NColumnShard::COUNTER_CLEANUP_SUCCESS : NColumnShard::COUNTER_CLEANUP_FAIL; -} - -} diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp new file mode 100644 index 000000000000..3aa29ed01a13 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.cpp @@ -0,0 +1,63 @@ +#include "cleanup_portions.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap { + +void TCleanupPortionsColumnEngineChanges::DoDebugString(TStringOutput& out) const { + if (ui32 dropped = PortionsToDrop.size()) { + out << "drop " << dropped << " portions"; + for (auto& portionInfo : PortionsToDrop) { + out << portionInfo.DebugString(); + } + } +} + +void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { + THashSet pathIds; + if (self) { + THashMap> blobIdsByStorage; + for (auto&& p : PortionsToDrop) { + p.RemoveFromDatabase(context.DBWrapper); + + p.FillBlobIdsByStorage(blobIdsByStorage, context.EngineLogs.GetVersionedIndex()); + pathIds.emplace(p.GetPathId()); + } + for (auto&& i : blobIdsByStorage) { + auto action = BlobsAction.GetRemoving(i.first); + for (auto&& b : i.second) { + action->DeclareRemove((TTabletId)self->TabletID(), b); + } + } + } +} + +void TCleanupPortionsColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { + for (auto& portionInfo : PortionsToDrop) { + if (!context.EngineLogs.ErasePortion(portionInfo)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "Cannot erase portion")("portion", portionInfo.DebugString()); + } + } + if (self) { + self->IncCounter(NColumnShard::COUNTER_PORTIONS_ERASED, PortionsToDrop.size()); + for (auto&& p : PortionsToDrop) { + self->IncCounter(NColumnShard::COUNTER_RAW_BYTES_ERASED, p.GetTotalRawBytes()); + } + } +} + +void TCleanupPortionsColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { + self.BackgroundController.StartCleanupPortions(); +} + +void TCleanupPortionsColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& /*context*/) { + self.BackgroundController.FinishCleanupPortions(); +} + +NColumnShard::ECumulativeCounters TCleanupPortionsColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { + return isSuccess ? NColumnShard::COUNTER_CLEANUP_SUCCESS : NColumnShard::COUNTER_CLEANUP_FAIL; +} + +} diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup.h b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h similarity index 63% rename from ydb/core/tx/columnshard/engines/changes/cleanup.h rename to ydb/core/tx/columnshard/engines/changes/cleanup_portions.h index b5c47f81fc1e..f4addb74f048 100644 --- a/ydb/core/tx/columnshard/engines/changes/cleanup.h +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_portions.h @@ -3,18 +3,18 @@ namespace NKikimr::NOlap { -class TCleanupColumnEngineChanges: public TColumnEngineChanges { +class TCleanupPortionsColumnEngineChanges: public TColumnEngineChanges { private: using TBase = TColumnEngineChanges; THashMap> BlobsToForget; THashMap>> StoragePortions; protected: + virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; + virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; + virtual void DoStart(NColumnShard::TColumnShard& self) override; virtual void DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) override; - virtual bool DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) override; virtual void DoDebugString(TStringOutput& out) const override; - virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) override; - virtual void DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) override; virtual void DoCompile(TFinalizationContext& /*context*/) override { } virtual TConclusionStatus DoConstructBlobs(TConstructionContext& /*context*/) noexcept override { @@ -27,23 +27,17 @@ class TCleanupColumnEngineChanges: public TColumnEngineChanges { virtual ui64 DoCalcMemoryForUsage() const override { return 0; } -public: - TCleanupColumnEngineChanges(const std::shared_ptr& storagesManager) - : TBase(storagesManager, StaticTypeName()) { - + virtual std::shared_ptr DoBuildDataLock() const override { + return std::make_shared(TypeString() + "::" + GetTaskIdentifier(), PortionsToDrop); } +public: + TCleanupPortionsColumnEngineChanges(const std::shared_ptr& storagesManager) + : TBase(storagesManager, NBlobOperations::EConsumer::CLEANUP_PORTIONS) { - virtual THashSet GetTouchedPortions() const override { - THashSet result; - for (const auto& portionInfo : PortionsToDrop) { - result.emplace(portionInfo.GetAddress()); - } - return result; } std::vector PortionsToDrop; - bool NeedRepeat = false; virtual ui32 GetWritePortionsCount() const override { return 0; @@ -52,11 +46,11 @@ class TCleanupColumnEngineChanges: public TColumnEngineChanges { return nullptr; } virtual bool NeedWritePortion(const ui32 /*index*/) const override { - return true; + return false; } static TString StaticTypeName() { - return "CS::CLEANUP"; + return "CS::CLEANUP::PORTIONS"; } virtual TString TypeString() const override { diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp new file mode 100644 index 000000000000..e600be9d6ef0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.cpp @@ -0,0 +1,42 @@ +#include "cleanup_tables.h" +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap { + +void TCleanupTablesColumnEngineChanges::DoDebugString(TStringOutput& out) const { + if (ui32 dropped = TablesToDrop.size()) { + out << "drop " << dropped << " tables: " << JoinSeq(",", TablesToDrop) << ";"; + } +} + +void TCleanupTablesColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { + if (self && context.DB) { + for (auto&& t : TablesToDrop) { + self->TablesManager.TryFinalizeDropPathOnExecute(*context.DB, t); + } + } +} + +void TCleanupTablesColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& /*context*/) { + for (auto&& t : TablesToDrop) { + self->TablesManager.TryFinalizeDropPathOnComplete(t); + } +} + +void TCleanupTablesColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { + self.BackgroundController.StartCleanupTables(); +} + +void TCleanupTablesColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& /*context*/) { + self.BackgroundController.FinishCleanupTables(); +} + +NColumnShard::ECumulativeCounters TCleanupTablesColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { + return isSuccess ? NColumnShard::COUNTER_CLEANUP_SUCCESS : NColumnShard::COUNTER_CLEANUP_FAIL; +} + +} diff --git a/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h new file mode 100644 index 000000000000..5f5b7a9bd00b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/changes/cleanup_tables.h @@ -0,0 +1,59 @@ +#pragma once +#include "abstract/abstract.h" + +namespace NKikimr::NOlap { + +class TCleanupTablesColumnEngineChanges: public TColumnEngineChanges { +private: + using TBase = TColumnEngineChanges; +protected: + virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; + virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; + + virtual void DoStart(NColumnShard::TColumnShard& self) override; + virtual void DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) override; + virtual void DoDebugString(TStringOutput& out) const override; + virtual void DoCompile(TFinalizationContext& /*context*/) override { + } + virtual TConclusionStatus DoConstructBlobs(TConstructionContext& /*context*/) noexcept override { + return TConclusionStatus::Success(); + } + virtual bool NeedConstruction() const override { + return false; + } + virtual NColumnShard::ECumulativeCounters GetCounterIndex(const bool isSuccess) const override; + virtual ui64 DoCalcMemoryForUsage() const override { + return 0; + } + virtual std::shared_ptr DoBuildDataLock() const override { + return std::make_shared(TypeString() + "::" + GetTaskIdentifier(), TablesToDrop); + } + +public: + TCleanupTablesColumnEngineChanges(const std::shared_ptr& storagesManager) + : TBase(storagesManager, NBlobOperations::EConsumer::CLEANUP_TABLES) { + + } + + THashSet TablesToDrop; + + virtual ui32 GetWritePortionsCount() const override { + return 0; + } + virtual TPortionInfoWithBlobs* GetWritePortionInfo(const ui32 /*index*/) override { + return nullptr; + } + virtual bool NeedWritePortion(const ui32 /*index*/) const override { + return false; + } + + static TString StaticTypeName() { + return "CS::CLEANUP::TABLES"; + } + + virtual TString TypeString() const override { + return StaticTypeName(); + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.cpp b/ydb/core/tx/columnshard/engines/changes/compaction.cpp index 6ad04f33711c..24e1e8c5c0e5 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction.cpp @@ -27,34 +27,34 @@ void TCompactColumnEngineChanges::DoCompile(TFinalizationContext& context) { } } -bool TCompactColumnEngineChanges::DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { - return TBase::DoApplyChanges(self, context); -} - -void TCompactColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) { - TBase::DoWriteIndex(self, context); -} - void TCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { TBase::DoStart(self); Y_ABORT_UNLESS(SwitchedPortions.size()); + THashMap> blobRanges; + auto& index = self.GetIndexAs().GetVersionedIndex(); for (const auto& p : SwitchedPortions) { Y_ABORT_UNLESS(!p.Empty()); - auto action = BlobsAction.GetReading(p); - for (const auto& rec : p.Records) { - action->AddRange(rec.BlobRange); + p.FillBlobRangesByStorage(blobRanges, index); + } + + for (const auto& p : blobRanges) { + auto action = BlobsAction.GetReading(p.first); + for (auto&& b: p.second) { + action->AddRange(b); } } - self.BackgroundController.StartCompaction(NKikimr::NOlap::TPlanCompactionInfo(GranuleMeta->GetPathId()), *this); + self.BackgroundController.StartCompaction(NKikimr::NOlap::TPlanCompactionInfo(GranuleMeta->GetPathId())); NeedGranuleStatusProvide = true; GranuleMeta->OnCompactionStarted(); } -void TCompactColumnEngineChanges::DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) { - TBase::DoWriteIndexComplete(self, context); - self.IncCounter(NColumnShard::COUNTER_COMPACTION_TIME, context.Duration.MilliSeconds()); +void TCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { + TBase::DoWriteIndexOnComplete(self, context); + if (self) { + self->IncCounter(NColumnShard::COUNTER_COMPACTION_TIME, context.Duration.MilliSeconds()); + } } void TCompactColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) { @@ -68,8 +68,8 @@ void TCompactColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, T NeedGranuleStatusProvide = false; } -TCompactColumnEngineChanges::TCompactColumnEngineChanges(const TSplitSettings& splitSettings, std::shared_ptr granule, const std::vector>& portions, const TSaverContext& saverContext) - : TBase(splitSettings, saverContext, StaticTypeName()) +TCompactColumnEngineChanges::TCompactColumnEngineChanges(std::shared_ptr granule, const std::vector>& portions, const TSaverContext& saverContext) + : TBase(saverContext, NBlobOperations::EConsumer::GENERAL_COMPACTION) , GranuleMeta(granule) { Y_ABORT_UNLESS(GranuleMeta); @@ -87,12 +87,4 @@ TCompactColumnEngineChanges::~TCompactColumnEngineChanges() { Y_DEBUG_ABORT_UNLESS(!NActors::TlsActivationContext || !NeedGranuleStatusProvide); } -THashSet TCompactColumnEngineChanges::GetTouchedPortions() const { - THashSet result = TBase::GetTouchedPortions(); - for (auto&& i : SwitchedPortions) { - result.emplace(i.GetAddress()); - } - return result; -} - } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction.h b/ydb/core/tx/columnshard/engines/changes/compaction.h index 7c033fdfbc0e..fc449e341459 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction.h @@ -14,23 +14,24 @@ class TCompactColumnEngineChanges: public TChangesWithAppend { protected: std::shared_ptr GranuleMeta; + virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; + virtual void DoStart(NColumnShard::TColumnShard& self) override; - virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) override; virtual void DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) override; - virtual void DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) override; virtual void DoDebugString(TStringOutput& out) const override; virtual void DoCompile(TFinalizationContext& context) override; - virtual bool DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) override; virtual TPortionMeta::EProduced GetResultProducedClass() const = 0; virtual void OnAbortEmergency() override { NeedGranuleStatusProvide = false; } + virtual std::shared_ptr DoBuildDataLockImpl() const override { + return std::make_shared(TypeString() + "::" + GetTaskIdentifier(), SwitchedPortions); + } + public: std::vector SwitchedPortions; // Portions that would be replaced by new ones - virtual THashSet GetTouchedPortions() const override; - - TCompactColumnEngineChanges(const TSplitSettings& splitSettings, std::shared_ptr granule, const std::vector>& portions, const TSaverContext& saverContext); + TCompactColumnEngineChanges(std::shared_ptr granule, const std::vector>& portions, const TSaverContext& saverContext); ~TCompactColumnEngineChanges(); static TString StaticTypeName() { diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp index 7b1dbdb36198..09eed586ac20 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.cpp @@ -2,37 +2,25 @@ #include #include #include +#include namespace NKikimr::NOlap::NCompaction { -std::vector> TChunkPreparation::DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { - auto loader = SchemaInfo->GetColumnLoaderVerified(Record.ColumnId); - auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(Data)); - - auto chunks = TSimpleSplitter(saver, counters).SplitBySizes(rb, Data, splitSizes); - std::vector> newChunks; - for (auto&& i : chunks) { - Y_ABORT_UNLESS(i.GetSlicedBatch()->num_columns() == 1); - newChunks.emplace_back(std::make_shared(saver.Apply(i.GetSlicedBatch()), i.GetSlicedBatch()->column(0), GetColumnId(), SchemaInfo)); - } - return newChunks; -} - std::shared_ptr TColumnPortion::AppendBlob(const TString& data, const TColumnRecord& columnChunk, ui32& remained) { -// if (CurrentPortionRecords + columnChunk.GetMeta().GetNumRowsVerified() <= Context.GetPortionRowsCountLimit() && -// columnChunk.GetMeta().GetRawBytesVerified() < Context.GetChunkRawBytesLimit() && +// if (CurrentPortionRecords + columnChunk.GetMeta().GetNumRows() <= Context.GetPortionRowsCountLimit() && +// columnChunk.GetMeta().GetRawBytes() < Context.GetChunkRawBytesLimit() && // data.size() < Context.GetChunkPackedBytesLimit() && -// columnChunk.GetMeta().GetRawBytesVerified() > Context.GetStorePackedChunkSizeLimit() && Context.GetSaver().IsHardPacker() && +// columnChunk.GetMeta().GetRawBytes() > Context.GetStorePackedChunkSizeLimit() && Context.GetSaver().IsHardPacker() && // Context.GetUseWholeChunksOptimization()) // { // NChanges::TGeneralCompactionCounters::OnFullBlobAppend(columnChunk.BlobRange.GetBlobSize()); // FlushBuffer(); // Chunks.emplace_back(std::make_shared(data, columnChunk, Context.GetSchemaInfo())); // PackedSize += Chunks.back()->GetPackedSize(); -// CurrentPortionRecords += columnChunk.GetMeta().GetNumRowsVerified(); +// CurrentPortionRecords += columnChunk.GetMeta().GetNumRows(); // return nullptr; // } else { - NChanges::TGeneralCompactionCounters::OnSplittedBlobAppend(columnChunk.BlobRange.GetBlobSize()); + NChanges::TGeneralCompactionCounters::OnSplittedBlobAppend(columnChunk.BlobRange.GetSize()); auto batch = NArrow::TStatusValidator::GetValid(Context.GetLoader()->Apply(data)); AFL_VERIFY(batch->num_columns() == 1); auto batchArray = batch->column(0); @@ -72,7 +60,7 @@ ui32 TColumnPortion::AppendSlice(const std::shared_ptr& a, const u bool TColumnPortion::FlushBuffer() { if (Builder->length()) { auto newArrayChunk = NArrow::TStatusValidator::GetValid(Builder->Finish()); - Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(newArrayChunk, Context.GetResultField()), newArrayChunk, Context.GetColumnId(), Context.GetSchemaInfo())); + Chunks.emplace_back(std::make_shared(Context.GetSaver().Apply(newArrayChunk, Context.GetResultField()), newArrayChunk, TChunkAddress(Context.GetColumnId(), 0), ColumnInfo)); Builder = Context.MakeBuilder(); CurrentChunkRawSize = 0; PredictedPackedBytes = 0; diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h b/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h index aa3a15ddceef..f1d4cbadd6cf 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/column_portion_chunk.h @@ -10,102 +10,6 @@ namespace NKikimr::NOlap::NCompaction { -class TChunkPreparation: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - TString Data; - TColumnRecord Record; - ISnapshotSchema::TPtr SchemaInfo; - std::shared_ptr First; - std::shared_ptr Last; -protected: - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; - virtual const TString& DoGetData() const override { - return Data; - } - virtual ui32 DoGetRecordsCountImpl() const override { - return Record.GetMeta().GetNumRowsVerified(); - } - virtual TString DoDebugString() const override { - return ""; - } - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - return Record.GetMeta(); - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return First; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return Last; - } - -public: - const TColumnRecord& GetRecord() const { - return Record; - } - - TChunkPreparation(const TString& data, const TColumnRecord& columnChunk, ISnapshotSchema::TPtr schema) - : TBase(columnChunk.ColumnId) - , Data(data) - , Record(columnChunk) - , SchemaInfo(schema) { - Y_ABORT_UNLESS(Data.size() == Record.BlobRange.Size || columnChunk.BlobRange.Size == 0); - } - - TChunkPreparation(const TString& data, const std::shared_ptr& column, const ui32 columnId, ISnapshotSchema::TPtr schema) - : TBase(columnId) - , Data(data) - , Record(TChunkAddress(columnId, 0), column, schema->GetIndexInfo()) - , SchemaInfo(schema) { - Y_ABORT_UNLESS(column->length()); - First = NArrow::TStatusValidator::GetValid(column->GetScalar(0)); - Last = NArrow::TStatusValidator::GetValid(column->GetScalar(column->length() - 1)); - Record.BlobRange.Size = data.size(); - } -}; - -class TNullChunkPreparation: public IPortionColumnChunk { -private: - using TBase = IPortionColumnChunk; - const ui32 RecordsCount; - TString Data; -protected: - virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, - const std::vector& /*splitSizes*/) const override { - AFL_VERIFY(false); - return {}; - } - virtual const TString& DoGetData() const override { - return Data; - } - virtual ui32 DoGetRecordsCountImpl() const override { - return RecordsCount; - } - virtual TString DoDebugString() const override { - return TStringBuilder() << "rc=" << RecordsCount << ";data_size=" << Data.size() << ";"; - } - virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { - AFL_VERIFY(false); - return TSimpleChunkMeta(nullptr, false, false); - } - virtual std::shared_ptr DoGetFirstScalar() const override { - return nullptr; - } - virtual std::shared_ptr DoGetLastScalar() const override { - return nullptr; - } - -public: - TNullChunkPreparation(const ui32 columnId, const ui32 recordsCount, const std::shared_ptr& f, const TColumnSaver& saver) - : TBase(columnId) - , RecordsCount(recordsCount) - , Data(saver.Apply(NArrow::TThreadSimpleArraysCache::GetNull(f->type(), recordsCount), f)) - { - Y_ABORT_UNLESS(RecordsCount); - SetChunkIdx(0); - } -}; - class TColumnPortionResult { protected: std::vector> Chunks; @@ -143,10 +47,13 @@ class TColumnPortion: public TColumnPortionResult { const TColumnMergeContext& Context; YDB_READONLY(ui64, CurrentChunkRawSize, 0); double PredictedPackedBytes = 0; + const TSimpleColumnInfo ColumnInfo; public: TColumnPortion(const TColumnMergeContext& context) : TBase(context.GetColumnId()) - , Context(context) { + , Context(context) + , ColumnInfo(Context.GetIndexInfo().GetColumnFeaturesVerified(context.GetColumnId())) + { Builder = Context.MakeBuilder(); } diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h b/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h index 834ca2293154..a5da857c2aff 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merge_context.h @@ -42,10 +42,10 @@ class TColumnMergeContext { } TColumnMergeContext(const ui32 columnId, const ISnapshotSchema::TPtr& schema, const ui32 portionRowsCountLimit, const ui32 chunkRawBytesLimit, - const std::optional& columnStat, const TSaverContext& saverContext) + const std::optional& columnStat) : ColumnId(columnId) , SchemaInfo(schema) - , Saver(schema->GetColumnSaver(columnId, saverContext)) + , Saver(schema->GetColumnSaver(columnId)) , Loader(schema->GetColumnLoaderOptional(columnId)) , ResultField(schema->GetIndexInfo().GetColumnFieldVerified(columnId)) , PortionRowsCountLimit(portionRowsCountLimit) diff --git a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp b/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp index ed03a8fcf523..5f638a30f155 100644 --- a/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp +++ b/ydb/core/tx/columnshard/engines/changes/compaction/merged_column.cpp @@ -3,7 +3,7 @@ namespace NKikimr::NOlap::NCompaction { void TMergedColumn::AppendBlob(const TString& data, const TColumnRecord& columnChunk) { - RecordsCount += columnChunk.GetMeta().GetNumRowsVerified(); + RecordsCount += columnChunk.GetMeta().GetNumRows(); ui32 remained; std::shared_ptr dataArray = Portions.back().AppendBlob(data, columnChunk, remained); while (remained) { diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp index 2b47f9b8a736..14dad1e76bb8 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.cpp @@ -11,20 +11,21 @@ #include #include #include +#include #include -#include +#include +#include namespace NKikimr::NOlap::NCompaction { -void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TConstructionContext& context) noexcept { - std::vector portions = TPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs); +void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TConstructionContext& context, std::vector&& portions) noexcept { std::vector> batchResults; auto resultSchema = context.SchemaVersions.GetLastSchema(); { auto resultDataSchema = resultSchema->GetIndexInfo().ArrowSchemaWithSpecials(); - NIndexedReader::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), resultDataSchema, false); + NArrow::NMerger::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), resultDataSchema, false, IIndexInfo::GetSpecialColumnNames()); for (auto&& i : portions) { - auto dataSchema = context.SchemaVersions.GetSchema(i.GetPortionInfo().GetMinSnapshot()); + auto dataSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); auto batch = i.GetBatch(dataSchema, *resultSchema); batch = resultSchema->NormalizeBatch(*dataSchema, batch); Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, resultSchema->GetIndexInfo().GetReplaceKey())); @@ -34,7 +35,7 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TCon } Y_ABORT_UNLESS(batchResults.size()); for (auto&& b : batchResults) { - auto portions = MakeAppendedPortions(b, GranuleMeta->GetPathId(), resultSchema->GetSnapshot(), GranuleMeta.get(), context); + auto portions = MakeAppendedPortions(b, GranuleMeta->GetPathId(), resultSchema->GetSnapshot(), GranuleMeta.get(), context, {}); Y_ABORT_UNLESS(portions.size()); for (auto& portion : portions) { AppendedPortions.emplace_back(std::move(portion)); @@ -42,14 +43,14 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByFullBatches(TCon } } -void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstructionContext& context) noexcept { - std::vector portions = TPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs); +void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstructionContext& context, std::vector&& portions) noexcept { static const TString portionIdFieldName = "$$__portion_id"; static const TString portionRecordIndexFieldName = "$$__portion_record_idx"; static const std::shared_ptr portionIdField = std::make_shared(portionIdFieldName, std::make_shared()); static const std::shared_ptr portionRecordIndexField = std::make_shared(portionRecordIndexFieldName, std::make_shared()); auto resultSchema = context.SchemaVersions.GetLastSchema(); + std::vector pkFieldNames = resultSchema->GetIndexInfo().GetReplaceKey()->field_names(); std::set pkFieldNamesSet(pkFieldNames.begin(), pkFieldNames.end()); for (auto&& i : TIndexInfo::GetSpecialColumnNames()) { @@ -65,10 +66,10 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc indexFields.emplace_back(i); } auto dataSchema = std::make_shared(indexFields); - NIndexedReader::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), dataSchema, false); + NArrow::NMerger::TMergePartialStream mergeStream(resultSchema->GetIndexInfo().GetReplaceKey(), dataSchema, false, IIndexInfo::GetSpecialColumnNames()); ui32 idx = 0; for (auto&& i : portions) { - auto dataSchema = context.SchemaVersions.GetSchema(i.GetPortionInfo().GetMinSnapshot()); + auto dataSchema = i.GetPortionInfo().GetSchema(context.SchemaVersions); auto batch = i.GetBatch(dataSchema, *resultSchema, pkFieldNamesSet); { NArrow::NConstruction::IArrayBuilder::TPtr column = std::make_shared>>(portionIdFieldName, idx++); @@ -99,14 +100,14 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc std::vector cursors; for (auto&& p : portions) { - auto dataSchema = context.SchemaVersions.GetSchema(p.GetPortionInfo().GetMinSnapshot()); + auto dataSchema = p.GetPortionInfo().GetSchema(context.SchemaVersions); auto loader = dataSchema->GetColumnLoaderOptional(columnId); std::vector records; std::vector> chunks; if (!p.ExtractColumnChunks(columnId, records, chunks)) { AFL_VERIFY(!loader); records = {nullptr}; - chunks.emplace_back(std::make_shared(columnId, p.GetPortionInfo().GetRecordsCount(), resultField, resultSchema->GetColumnSaver(columnId, SaverContext))); + chunks.emplace_back(std::make_shared(columnId, p.GetPortionInfo().GetRecordsCount(), resultField, resultSchema->GetColumnSaver(columnId))); loader = resultSchema->GetColumnLoaderVerified(columnId); } AFL_VERIFY(!!loader); @@ -118,8 +119,8 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc std::map> columnChunks; ui32 batchIdx = 0; for (auto&& batchResult : batchResults) { - const ui32 portionRecordsCountLimit = batchResult->num_rows() / (batchResult->num_rows() / GetSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; - TColumnMergeContext context(columnId, resultSchema, portionRecordsCountLimit, GetSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo, SaverContext); + const ui32 portionRecordsCountLimit = batchResult->num_rows() / (batchResult->num_rows() / NSplitter::TSplitSettings().GetExpectedRecordsCountOnPage() + 1) + 1; + TColumnMergeContext context(columnId, resultSchema, portionRecordsCountLimit, NSplitter::TSplitSettings().GetExpectedUnpackColumnChunkRawSize(), columnInfo); TMergedColumn mColumn(context); auto columnPortionIdx = batchResult->GetColumnByName(portionIdFieldName); @@ -159,6 +160,8 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc } ui32 batchIdx = 0; + + const auto groups = resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); for (auto&& columnChunks : chunkGroups) { auto batchResult = batchResults[batchIdx]; ++batchIdx; @@ -174,28 +177,29 @@ void TGeneralCompactColumnEngineChanges::BuildAppendedPortionsByChunks(TConstruc } std::vector batchSlices; - std::shared_ptr schemaDetails(new TDefaultSchemaDetails(resultSchema, SaverContext, stats)); + std::shared_ptr schemaDetails(new TDefaultSchemaDetails(resultSchema, stats)); for (ui32 i = 0; i < columnChunks.begin()->second.size(); ++i) { - std::map>> portionColumns; + THashMap>> portionColumns; for (auto&& p : columnChunks) { portionColumns.emplace(p.first, p.second[i].GetChunks()); } resultSchema->GetIndexInfo().AppendIndexes(portionColumns); - batchSlices.emplace_back(portionColumns, schemaDetails, context.Counters.SplitterCounters, GetSplitSettings()); + batchSlices.emplace_back(portionColumns, schemaDetails, context.Counters.SplitterCounters); } - TSimilarSlicer slicer(GetSplitSettings().GetExpectedPortionSize()); + TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); auto packs = slicer.Split(batchSlices); ui32 recordIdx = 0; for (auto&& i : packs) { TGeneralSerializedSlice slice(std::move(i)); auto b = batchResult->Slice(recordIdx, slice.GetRecordsCount()); - std::vector>> chunksByBlobs = slice.GroupChunksByBlobs(); - AppendedPortions.emplace_back(TPortionInfoWithBlobs::BuildByBlobs(chunksByBlobs, nullptr, GranuleMeta->GetPathId(), resultSchema->GetSnapshot(), SaverContext.GetStorageOperator())); + AppendedPortions.emplace_back(TPortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), nullptr, GranuleMeta->GetPathId(), + resultSchema->GetVersion(), resultSchema->GetSnapshot(), SaverContext.GetStoragesManager())); + AppendedPortions.back().FillStatistics(resultSchema->GetIndexInfo()); NArrow::TFirstLastSpecialKeys primaryKeys(slice.GetFirstLastPKBatch(resultSchema->GetIndexInfo().GetReplaceKey())); NArrow::TMinMaxSpecialKeys snapshotKeys(b, TIndexInfo::ArrowSchemaSnapshot()); - AppendedPortions.back().GetPortionInfo().AddMetadata(*resultSchema, primaryKeys, snapshotKeys, SaverContext.GetTierName()); + AppendedPortions.back().GetPortionInfo().AddMetadata(*resultSchema, primaryKeys, snapshotKeys, IStoragesManager::DefaultStorageId); recordIdx += slice.GetRecordsCount(); } } @@ -209,22 +213,25 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc i64 otherPortionsSize = 0; for (auto&& i : SwitchedPortions) { if (i.GetMeta().GetProduced() == TPortionMeta::EProduced::INSERTED) { - insertedPortionsSize += i.GetBlobBytes(); + insertedPortionsSize += i.GetTotalBlobBytes(); } else if (i.GetMeta().GetProduced() == TPortionMeta::EProduced::SPLIT_COMPACTED) { - compactedPortionsSize += i.GetBlobBytes(); + compactedPortionsSize += i.GetTotalBlobBytes(); } else { - otherPortionsSize += i.GetBlobBytes(); + otherPortionsSize += i.GetTotalBlobBytes(); } - portionsSize += i.GetBlobBytes(); + portionsSize += i.GetTotalBlobBytes(); ++portionsCount; } NChanges::TGeneralCompactionCounters::OnPortionsKind(insertedPortionsSize, compactedPortionsSize, otherPortionsSize); NChanges::TGeneralCompactionCounters::OnRepackPortions(portionsCount, portionsSize); - if (!HasAppData() || AppDataVerified().ColumnShardConfig.GetUseChunkedMergeOnCompaction()) { - BuildAppendedPortionsByChunks(context); - } else { - BuildAppendedPortionsByFullBatches(context); + { + std::vector portions = TPortionInfoWithBlobs::RestorePortions(SwitchedPortions, Blobs, context.SchemaVersions, SaverContext.GetStoragesManager()); + if (!HasAppData() || AppDataVerified().ColumnShardConfig.GetUseChunkedMergeOnCompaction()) { + BuildAppendedPortionsByChunks(context, std::move(portions)); + } else { + BuildAppendedPortionsByFullBatches(context, std::move(portions)); + } } if (IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { @@ -246,24 +253,26 @@ TConclusionStatus TGeneralCompactColumnEngineChanges::DoConstructBlobs(TConstruc return TConclusionStatus::Success(); } -void TGeneralCompactColumnEngineChanges::DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) { - TBase::DoWriteIndexComplete(self, context); - self.IncCounter(context.FinishedSuccessfully ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); - self.IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, context.BlobsWritten); - self.IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, context.BytesWritten); +void TGeneralCompactColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { + TBase::DoWriteIndexOnComplete(self, context); + if (self) { + self->IncCounter(context.FinishedSuccessfully ? NColumnShard::COUNTER_SPLIT_COMPACTION_SUCCESS : NColumnShard::COUNTER_SPLIT_COMPACTION_FAIL); + self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BLOBS_WRITTEN, context.BlobsWritten); + self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_BYTES_WRITTEN, context.BytesWritten); + } } void TGeneralCompactColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { TBase::DoStart(self); auto& g = *GranuleMeta; - self.CSCounters.OnSplitCompactionInfo(g.GetAdditiveSummary().GetCompacted().GetPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); + self.CSCounters.OnSplitCompactionInfo(g.GetAdditiveSummary().GetCompacted().GetTotalPortionsSize(), g.GetAdditiveSummary().GetCompacted().GetPortionsCount()); } NColumnShard::ECumulativeCounters TGeneralCompactColumnEngineChanges::GetCounterIndex(const bool isSuccess) const { return isSuccess ? NColumnShard::COUNTER_COMPACTION_SUCCESS : NColumnShard::COUNTER_COMPACTION_FAIL; } -void TGeneralCompactColumnEngineChanges::AddCheckPoint(const NIndexedReader::TSortableBatchPosition& position, const bool include, const bool validationDuplications) { +void TGeneralCompactColumnEngineChanges::AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include, const bool validationDuplications) { AFL_VERIFY(CheckPoints.emplace(position, include).second || !validationDuplications); } @@ -283,10 +292,10 @@ ui64 TGeneralCompactColumnEngineChanges::TMemoryPredictorChunkedPolicy::AddPorti SumMemoryFix += i.BlobRange.Size; auto it = maxChunkSizeByColumn.find(i.GetColumnId()); if (it == maxChunkSizeByColumn.end()) { - maxChunkSizeByColumn.emplace(i.GetColumnId(), i.GetMeta().GetRawBytesVerified()); + maxChunkSizeByColumn.emplace(i.GetColumnId(), i.GetMeta().GetRawBytes()); } else { - if (it->second < i.GetMeta().GetRawBytesVerified()) { - it->second = i.GetMeta().GetRawBytesVerified(); + if (it->second < i.GetMeta().GetRawBytes()) { + it->second = i.GetMeta().GetRawBytes(); } } } diff --git a/ydb/core/tx/columnshard/engines/changes/general_compaction.h b/ydb/core/tx/columnshard/engines/changes/general_compaction.h index e337d3cc3dbc..df583585d9cf 100644 --- a/ydb/core/tx/columnshard/engines/changes/general_compaction.h +++ b/ydb/core/tx/columnshard/engines/changes/general_compaction.h @@ -1,16 +1,16 @@ #pragma once #include "compaction.h" -#include +#include namespace NKikimr::NOlap::NCompaction { class TGeneralCompactColumnEngineChanges: public TCompactColumnEngineChanges { private: using TBase = TCompactColumnEngineChanges; - virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) override; - std::map CheckPoints; - void BuildAppendedPortionsByFullBatches(TConstructionContext& context) noexcept; - void BuildAppendedPortionsByChunks(TConstructionContext& context) noexcept; + virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; + std::map CheckPoints; + void BuildAppendedPortionsByFullBatches(TConstructionContext& context, std::vector&& portions) noexcept; + void BuildAppendedPortionsByChunks(TConstructionContext& context, std::vector&& portions) noexcept; protected: virtual TConclusionStatus DoConstructBlobs(TConstructionContext& context) noexcept override; virtual TPortionMeta::EProduced GetResultProducedClass() const override { @@ -36,7 +36,7 @@ class TGeneralCompactColumnEngineChanges: public TCompactColumnEngineChanges { virtual ui64 AddPortion(const TPortionInfo& portionInfo) override { for (auto&& i : portionInfo.GetRecords()) { SumMemory += i.BlobRange.Size; - SumMemory += 2 * i.GetMeta().GetRawBytesVerified(); + SumMemory += 2 * i.GetMeta().GetRawBytes(); } return SumMemory; } @@ -54,7 +54,7 @@ class TGeneralCompactColumnEngineChanges: public TCompactColumnEngineChanges { static std::shared_ptr BuildMemoryPredictor(); - void AddCheckPoint(const NIndexedReader::TSortableBatchPosition& position, const bool include = true, const bool validationDuplications = true); + void AddCheckPoint(const NArrow::NMerger::TSortableBatchPosition& position, const bool include = true, const bool validationDuplications = true); virtual TString TypeString() const override { return StaticTypeName(); diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.cpp b/ydb/core/tx/columnshard/engines/changes/indexation.cpp index f91fcb099f42..ffbac159cc2a 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.cpp +++ b/ydb/core/tx/columnshard/engines/changes/indexation.cpp @@ -4,24 +4,19 @@ #include #include #include +#include +#include +#include namespace NKikimr::NOlap { -bool TInsertColumnEngineChanges::DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { - if (!TBase::DoApplyChanges(self, context)) { - return false; - } - return true; -} - -void TInsertColumnEngineChanges::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) { - TBase::DoWriteIndex(self, context); - auto removing = BlobsAction.GetRemoving(IStoragesManager::DefaultStorageId); - for (const auto& insertedData : DataToIndex) { - self.InsertTable->EraseCommitted(context.DBWrapper, insertedData, removing); - } - if (!DataToIndex.empty()) { - self.UpdateInsertTableCounters(); +void TInsertColumnEngineChanges::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { + TBase::DoWriteIndexOnExecute(self, context); + if (self) { + auto removing = BlobsAction.GetRemoving(IStoragesManager::DefaultStorageId); + for (const auto& insertedData : DataToIndex) { + self->InsertTable->EraseCommittedOnExecute(context.DBWrapper, insertedData, removing); + } } } @@ -30,16 +25,25 @@ void TInsertColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { Y_ABORT_UNLESS(DataToIndex.size()); auto reading = BlobsAction.GetReading(IStoragesManager::DefaultStorageId); for (auto&& insertedData : DataToIndex) { - reading->AddRange(insertedData.GetBlobRange(), insertedData.GetBlobData().value_or("")); + reading->AddRange(insertedData.GetBlobRange(), insertedData.GetBlobData()); } self.BackgroundController.StartIndexing(*this); } -void TInsertColumnEngineChanges::DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) { - self.IncCounter(NColumnShard::COUNTER_INDEXING_BLOBS_WRITTEN, context.BlobsWritten); - self.IncCounter(NColumnShard::COUNTER_INDEXING_BYTES_WRITTEN, context.BytesWritten); - self.IncCounter(NColumnShard::COUNTER_INDEXING_TIME, context.Duration.MilliSeconds()); +void TInsertColumnEngineChanges::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { + TBase::DoWriteIndexOnComplete(self, context); + if (self) { + for (const auto& insertedData : DataToIndex) { + self->InsertTable->EraseCommittedOnComplete(insertedData); + } + if (!DataToIndex.empty()) { + self->UpdateInsertTableCounters(); + } + self->IncCounter(NColumnShard::COUNTER_INDEXING_BLOBS_WRITTEN, context.BlobsWritten); + self->IncCounter(NColumnShard::COUNTER_INDEXING_BYTES_WRITTEN, context.BytesWritten); + self->IncCounter(NColumnShard::COUNTER_INDEXING_TIME, context.Duration.MilliSeconds()); + } } void TInsertColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& /*context*/) { @@ -73,12 +77,10 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont std::shared_ptr batch; { - auto itBlobData = Blobs.find(blobRange); - Y_ABORT_UNLESS(itBlobData != Blobs.end(), "Data for range %s has not been read", blobRange.ToString().c_str()); - Y_ABORT_UNLESS(!itBlobData->second.empty(), "Blob data not present"); + const auto blobData = Blobs.Extract(IStoragesManager::DefaultStorageId, blobRange); + Y_ABORT_UNLESS(blobData.size(), "Blob data not present"); // Prepare batch - batch = NArrow::DeserializeBatch(itBlobData->second, indexInfo.ArrowSchema()); - Blobs.erase(itBlobData); + batch = NArrow::DeserializeBatch(blobData, indexInfo.ArrowSchema()); AFL_VERIFY(batch)("event", "cannot_parse") ("data_snapshot", TStringBuilder() << inserted.GetSnapshot()) ("index_snapshot", TStringBuilder() << blobSchema->GetSnapshot()); @@ -91,10 +93,10 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont Y_DEBUG_ABORT_UNLESS(NArrow::IsSorted(pathBatches[inserted.PathId].back(), resultSchema->GetIndexInfo().GetReplaceKey())); } - Y_ABORT_UNLESS(Blobs.empty()); + Y_ABORT_UNLESS(Blobs.IsEmpty()); const std::vector comparableColumns = resultSchema->GetIndexInfo().GetReplaceKey()->field_names(); for (auto& [pathId, batches] : pathBatches) { - NIndexedReader::TMergePartialStream stream(resultSchema->GetIndexInfo().GetReplaceKey(), resultSchema->GetIndexInfo().ArrowSchemaWithSpecials(), false); + NArrow::NMerger::TMergePartialStream stream(resultSchema->GetIndexInfo().GetReplaceKey(), resultSchema->GetIndexInfo().ArrowSchemaWithSpecials(), false, IIndexInfo::GetSpecialColumnNames()); THashMap fieldSizes; ui64 rowsCount = 0; for (auto&& batch : batches) { @@ -105,23 +107,24 @@ TConclusionStatus TInsertColumnEngineChanges::DoConstructBlobs(TConstructionCont rowsCount += batch->num_rows(); } - NIndexedReader::TRecordBatchBuilder builder(resultSchema->GetIndexInfo().ArrowSchemaWithSpecials()->fields(), rowsCount, fieldSizes); + NArrow::NMerger::TRecordBatchBuilder builder(resultSchema->GetIndexInfo().ArrowSchemaWithSpecials()->fields(), rowsCount, fieldSizes); stream.SetPossibleSameVersion(true); stream.DrainAll(builder); auto itGranule = PathToGranule.find(pathId); AFL_VERIFY(itGranule != PathToGranule.end()); - std::vector> result = NIndexedReader::TSortableBatchPosition::SplitByBordersInSequentialContainer(builder.Finalize(), comparableColumns, itGranule->second); + std::vector> result = NArrow::NMerger::TSortableBatchPosition::SplitByBordersInSequentialContainer(builder.Finalize(), comparableColumns, itGranule->second); for (auto&& b : result) { if (!b) { continue; } + std::optional externalSaver; if (b->num_rows() < 100) { - SaverContext.SetExternalSerializer(NArrow::NSerialization::TSerializerContainer(std::make_shared(arrow::Compression::type::UNCOMPRESSED))); + externalSaver = NArrow::NSerialization::TSerializerContainer(std::make_shared(arrow::Compression::type::UNCOMPRESSED)); } else { - SaverContext.SetExternalSerializer(NArrow::NSerialization::TSerializerContainer(std::make_shared(arrow::Compression::type::LZ4_FRAME))); + externalSaver = NArrow::NSerialization::TSerializerContainer(std::make_shared(arrow::Compression::type::LZ4_FRAME)); } - auto portions = MakeAppendedPortions(b, pathId, maxSnapshot, nullptr, context); + auto portions = MakeAppendedPortions(b, pathId, maxSnapshot, nullptr, context, externalSaver); Y_ABORT_UNLESS(portions.size()); for (auto& portion : portions) { AppendedPortions.emplace_back(std::move(portion)); diff --git a/ydb/core/tx/columnshard/engines/changes/indexation.h b/ydb/core/tx/columnshard/engines/changes/indexation.h index 13c407c23d9b..95befd334c23 100644 --- a/ydb/core/tx/columnshard/engines/changes/indexation.h +++ b/ydb/core/tx/columnshard/engines/changes/indexation.h @@ -2,8 +2,8 @@ #include "abstract/abstract.h" #include "with_appended.h" #include -#include #include +#include namespace NKikimr::NOlap { @@ -14,11 +14,11 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { const TIndexInfo& indexInfo, const TInsertedData& inserted) const; std::vector DataToIndex; protected: + virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; + virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; + virtual void DoStart(NColumnShard::TColumnShard& self) override; - virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& self, TWriteIndexCompleteContext& context) override; - virtual bool DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) override; virtual void DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) override; - virtual void DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) override; virtual TConclusionStatus DoConstructBlobs(TConstructionContext& context) noexcept override; virtual NColumnShard::ECumulativeCounters GetCounterIndex(const bool isSuccess) const override; virtual ui64 DoCalcMemoryForUsage() const override { @@ -28,11 +28,16 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { } return result; } + + virtual std::shared_ptr DoBuildDataLockImpl() const override { + return nullptr; + } + public: - THashMap> PathToGranule; // pathId -> positions (sorted by pk) + THashMap> PathToGranule; // pathId -> positions (sorted by pk) public: - TInsertColumnEngineChanges(std::vector&& dataToIndex, const TSplitSettings& splitSettings, const TSaverContext& saverContext) - : TBase(splitSettings, saverContext, StaticTypeName()) + TInsertColumnEngineChanges(std::vector&& dataToIndex, const TSaverContext& saverContext) + : TBase(saverContext, NBlobOperations::EConsumer::INDEXATION) , DataToIndex(std::move(dataToIndex)) { } @@ -41,10 +46,6 @@ class TInsertColumnEngineChanges: public TChangesWithAppend { return DataToIndex; } - virtual THashSet GetTouchedPortions() const override { - return TBase::GetTouchedPortions(); - } - static TString StaticTypeName() { return "CS::INDEXATION"; } diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.cpp b/ydb/core/tx/columnshard/engines/changes/ttl.cpp index 9a0ad102e54b..1caea149059e 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.cpp +++ b/ydb/core/tx/columnshard/engines/changes/ttl.cpp @@ -9,63 +9,65 @@ namespace NKikimr::NOlap { void TTTLColumnEngineChanges::DoDebugString(TStringOutput& out) const { TBase::DoDebugString(out); - out << "eviction=" << PortionsToEvict.size() << ";"; + out << "eviction=" << PortionsToEvict.size() << ";address=" << RWAddress.DebugString() << ";"; } void TTTLColumnEngineChanges::DoStart(NColumnShard::TColumnShard& self) { Y_ABORT_UNLESS(PortionsToEvict.size() || PortionsToRemove.size()); + THashMap> blobRanges; + auto& engine = self.MutableIndexAs(); + auto& index = engine.GetVersionedIndex(); for (const auto& p : PortionsToEvict) { Y_ABORT_UNLESS(!p.GetPortionInfo().Empty()); - - auto agent = BlobsAction.GetReading(p.GetPortionInfo()); - for (const auto& rec : p.GetPortionInfo().Records) { - agent->AddRange(rec.BlobRange); + p.GetPortionInfo().FillBlobRangesByStorage(blobRanges, index); + } + for (auto&& i : blobRanges) { + auto action = BlobsAction.GetReading(i.first); + for (auto&& b : i.second) { + action->AddRange(b); } } - self.BackgroundController.StartTtl(*this); + engine.GetActualizationController()->StartActualization(RWAddress); } void TTTLColumnEngineChanges::DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& /*context*/) { - self.BackgroundController.FinishTtl(); + auto& engine = self.MutableIndexAs(); + engine.GetActualizationController()->FinishActualization(RWAddress); + if (IsAborted()) { + THashMap> restoreIndexAddresses; + for (auto&& i : PortionsToEvict) { + AFL_VERIFY(restoreIndexAddresses[i.GetPortionInfo().GetPathId()].emplace(i.GetPortionInfo().GetPortionId()).second); + } + for (auto&& i : PortionsToRemove) { + AFL_VERIFY(restoreIndexAddresses[i.first.GetPathId()].emplace(i.first.GetPortionId()).second); + } + engine.ReturnToIndexes(restoreIndexAddresses); + } } -std::optional TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, THashMap& srcBlobs, - TConstructionContext& context) const { +std::optional TTTLColumnEngineChanges::UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, + TConstructionContext& context) const +{ const TPortionInfo& portionInfo = info.GetPortionInfo(); auto& evictFeatures = info.GetFeatures(); - Y_ABORT_UNLESS(portionInfo.GetMeta().GetTierName() != evictFeatures.TargetTierName); - - auto* tiering = Tiering.FindPtr(evictFeatures.PathId); - Y_ABORT_UNLESS(tiering); - auto serializer = tiering->GetSerializer(evictFeatures.TargetTierName); - if (!serializer) { - // Nothing to recompress. We have no other kinds of evictions yet. - evictFeatures.DataChanges = false; - auto result = TPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs); - result.GetPortionInfo().InitOperator(evictFeatures.StorageOperator, true); - result.GetPortionInfo().MutableMeta().SetTierName(evictFeatures.TargetTierName); - return result; - } + auto blobSchema = portionInfo.GetSchema(context.SchemaVersions); + Y_ABORT_UNLESS(portionInfo.GetMeta().GetTierName() != evictFeatures.GetTargetTierName() || blobSchema->GetVersion() < evictFeatures.GetTargetScheme()->GetVersion()); - auto blobSchema = context.SchemaVersions.GetSchema(portionInfo.GetMinSnapshot()); - auto resultSchema = context.SchemaVersions.GetLastSchema(); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("portion_for_eviction", portionInfo.DebugString()); + auto portionWithBlobs = TPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs, blobSchema->GetIndexInfo(), SaverContext.GetStoragesManager()); + TPortionInfoWithBlobs result = TPortionInfoWithBlobs::SyncPortion( + std::move(portionWithBlobs), blobSchema, evictFeatures.GetTargetScheme(), evictFeatures.GetTargetTierName(), SaverContext.GetStoragesManager(), context.Counters.SplitterCounters); - TSaverContext saverContext(evictFeatures.StorageOperator, SaverContext.GetStoragesManager()); - saverContext.SetTierName(evictFeatures.TargetTierName).SetExternalSerializer(*serializer); - auto withBlobs = TPortionInfoWithBlobs::RestorePortion(portionInfo, srcBlobs); - withBlobs.GetPortionInfo().InitOperator(evictFeatures.StorageOperator, true); - withBlobs.GetPortionInfo().MutableMeta().SetTierName(evictFeatures.TargetTierName); - return withBlobs.ChangeSaver(resultSchema, saverContext); + result.GetPortionInfo().MutableMeta().SetTierName(evictFeatures.GetTargetTierName()); + return std::move(result); } NKikimr::TConclusionStatus TTTLColumnEngineChanges::DoConstructBlobs(TConstructionContext& context) noexcept { - Y_ABORT_UNLESS(!Blobs.empty()); + Y_ABORT_UNLESS(!Blobs.IsEmpty()); Y_ABORT_UNLESS(!PortionsToEvict.empty()); for (auto&& info : PortionsToEvict) { if (auto pwb = UpdateEvictedPortion(info, Blobs, context)) { - info.MutablePortionInfo().SetRemoveSnapshot(info.MutablePortionInfo().GetMinSnapshot()); + info.MutablePortionInfo().SetRemoveSnapshot(context.LastCommittedTx); AFL_VERIFY(PortionsToRemove.emplace(info.GetPortionInfo().GetAddress(), info.GetPortionInfo()).second); AppendedPortions.emplace_back(std::move(*pwb)); } diff --git a/ydb/core/tx/columnshard/engines/changes/ttl.h b/ydb/core/tx/columnshard/engines/changes/ttl.h index 5018d860bbb4..c9cb1a989d93 100644 --- a/ydb/core/tx/columnshard/engines/changes/ttl.h +++ b/ydb/core/tx/columnshard/engines/changes/ttl.h @@ -1,14 +1,15 @@ #pragma once #include "compaction.h" + +#include + #include namespace NKikimr::NOlap { class TTTLColumnEngineChanges: public TChangesWithAppend { private: - using TPathIdBlobs = THashMap>; using TBase = TChangesWithAppend; - THashMap ExportTierBlobs; class TPortionForEviction { private: @@ -39,11 +40,11 @@ class TTTLColumnEngineChanges: public TChangesWithAppend { } }; - std::optional UpdateEvictedPortion(TPortionForEviction& info, THashMap& srcBlobs, + std::optional UpdateEvictedPortion(TPortionForEviction& info, NBlobOperations::NRead::TCompositeReadBlobs& srcBlobs, TConstructionContext& context) const; - std::vector PortionsToEvict; // {portion, TPortionEvictionFeatures} - + std::vector PortionsToEvict; + const NActualizer::TRWAddress RWAddress; protected: virtual void DoStart(NColumnShard::TColumnShard& self) override; virtual void DoOnFinish(NColumnShard::TColumnShard& self, TChangesFinishContext& context) override; @@ -58,6 +59,12 @@ class TTTLColumnEngineChanges: public TChangesWithAppend { } return result; } + virtual std::shared_ptr DoBuildDataLockImpl() const override { + const auto pred = [](const TPortionForEviction& p) { + return p.GetPortionInfo().GetAddress(); + }; + return std::make_shared(TypeString() + "::" + RWAddress.DebugString() + "::" + GetTaskIdentifier(), PortionsToEvict, pred); + } public: class TMemoryPredictorSimplePolicy: public IMemoryPredictor { private: @@ -65,14 +72,18 @@ class TTTLColumnEngineChanges: public TChangesWithAppend { ui64 MaxRawMemory = 0; public: virtual ui64 AddPortion(const TPortionInfo& portionInfo) override { - if (MaxRawMemory < portionInfo.GetRawBytes()) { - MaxRawMemory = portionInfo.GetRawBytes(); + if (MaxRawMemory < portionInfo.GetTotalRawBytes()) { + MaxRawMemory = portionInfo.GetTotalRawBytes(); } - SumBlobsMemory += portionInfo.GetBlobBytes(); + SumBlobsMemory += portionInfo.GetTotalBlobBytes(); return SumBlobsMemory + MaxRawMemory; } }; + const NActualizer::TRWAddress& GetRWAddress() const { + return RWAddress; + } + static std::shared_ptr BuildMemoryPredictor() { return std::make_shared(); } @@ -80,16 +91,6 @@ class TTTLColumnEngineChanges: public TChangesWithAppend { virtual bool NeedConstruction() const override { return PortionsToEvict.size(); } - virtual THashSet GetTouchedPortions() const override { - THashSet result = TBase::GetTouchedPortions(); - for (auto&& info : PortionsToEvict) { - result.emplace(info.GetPortionInfo().GetAddress()); - } - return result; - } - - THashMap Tiering; - ui32 GetPortionsToEvictCount() const { return PortionsToEvict.size(); } @@ -108,8 +109,10 @@ class TTTLColumnEngineChanges: public TChangesWithAppend { return StaticTypeName(); } - TTTLColumnEngineChanges(const TSplitSettings& splitSettings, const TSaverContext& saverContext) - : TBase(splitSettings, saverContext, StaticTypeName()) { + TTTLColumnEngineChanges(const NActualizer::TRWAddress& address, const TSaverContext& saverContext) + : TBase(saverContext, NBlobOperations::EConsumer::TTL) + , RWAddress(address) + { } diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp index 8b47d79ffb80..a76bbb34f12c 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.cpp +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.cpp @@ -2,77 +2,87 @@ #include #include #include -#include +#include +#include namespace NKikimr::NOlap { -void TChangesWithAppend::DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& /*context*/) { - for (auto& portionInfo : AppendedPortions) { - switch (portionInfo.GetPortionInfo().GetMeta().Produced) { - case NOlap::TPortionMeta::EProduced::UNSPECIFIED: - Y_ABORT_UNLESS(false); // unexpected - case NOlap::TPortionMeta::EProduced::INSERTED: - self.IncCounter(NColumnShard::COUNTER_INDEXING_PORTIONS_WRITTEN); - break; - case NOlap::TPortionMeta::EProduced::COMPACTED: - self.IncCounter(NColumnShard::COUNTER_COMPACTION_PORTIONS_WRITTEN); - break; - case NOlap::TPortionMeta::EProduced::SPLIT_COMPACTED: - self.IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_PORTIONS_WRITTEN); - break; - case NOlap::TPortionMeta::EProduced::EVICTED: - Y_ABORT("Unexpected evicted case"); - break; - case NOlap::TPortionMeta::EProduced::INACTIVE: - Y_ABORT("Unexpected inactive case"); - break; +void TChangesWithAppend::DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) { + THashSet usedPortionIds; + for (auto& [_, portionInfo] : PortionsToRemove) { + Y_ABORT_UNLESS(!portionInfo.Empty()); + Y_ABORT_UNLESS(portionInfo.HasRemoveSnapshot()); + AFL_VERIFY(usedPortionIds.emplace(portionInfo.GetPortionId()).second)("portion_info", portionInfo.DebugString(true)); + portionInfo.SaveToDatabase(context.DBWrapper); + } + const auto predRemoveDroppedTable = [self](const TPortionInfoWithBlobs& item) { + auto& portionInfo = item.GetPortionInfo(); + if (!!self && (!self->TablesManager.HasTable(portionInfo.GetPathId()) || self->TablesManager.GetTable(portionInfo.GetPathId()).IsDropped())) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_inserted_data")("reason", "table_removed")("path_id", portionInfo.GetPathId()); + return true; + } else { + return false; } + }; + AppendedPortions.erase(std::remove_if(AppendedPortions.begin(), AppendedPortions.end(), predRemoveDroppedTable), AppendedPortions.end()); + for (auto& portionInfoWithBlobs : AppendedPortions) { + auto& portionInfo = portionInfoWithBlobs.GetPortionInfo(); + Y_ABORT_UNLESS(!portionInfo.Empty()); + AFL_VERIFY(usedPortionIds.emplace(portionInfo.GetPortionId()).second)("portion_info", portionInfo.DebugString(true)); + portionInfo.SaveToDatabase(context.DBWrapper); } - self.IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); +} - THashSet blobsDeactivated; - for (auto& [_, portionInfo] : PortionsToRemove) { - for (auto& rec : portionInfo.Records) { - blobsDeactivated.insert(rec.BlobRange.BlobId); +void TChangesWithAppend::DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) { + if (self) { + for (auto& portionInfo : AppendedPortions) { + switch (portionInfo.GetPortionInfo().GetMeta().Produced) { + case NOlap::TPortionMeta::EProduced::UNSPECIFIED: + Y_ABORT_UNLESS(false); // unexpected + case NOlap::TPortionMeta::EProduced::INSERTED: + self->IncCounter(NColumnShard::COUNTER_INDEXING_PORTIONS_WRITTEN); + break; + case NOlap::TPortionMeta::EProduced::COMPACTED: + self->IncCounter(NColumnShard::COUNTER_COMPACTION_PORTIONS_WRITTEN); + break; + case NOlap::TPortionMeta::EProduced::SPLIT_COMPACTED: + self->IncCounter(NColumnShard::COUNTER_SPLIT_COMPACTION_PORTIONS_WRITTEN); + break; + case NOlap::TPortionMeta::EProduced::EVICTED: + Y_ABORT("Unexpected evicted case"); + break; + case NOlap::TPortionMeta::EProduced::INACTIVE: + Y_ABORT("Unexpected inactive case"); + break; + } } - self.IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.RawBytesSum()); - } + self->IncCounter(NColumnShard::COUNTER_PORTIONS_DEACTIVATED, PortionsToRemove.size()); - self.IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); - for (auto& blobId : blobsDeactivated) { - self.IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); - } -} + THashSet blobsDeactivated; + for (auto& [_, portionInfo] : PortionsToRemove) { + for (auto& rec : portionInfo.Records) { + blobsDeactivated.emplace(portionInfo.GetBlobId(rec.BlobRange.GetBlobIdxVerified())); + } + self->IncCounter(NColumnShard::COUNTER_RAW_BYTES_DEACTIVATED, portionInfo.GetTotalRawBytes()); + } -bool TChangesWithAppend::DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) { - // Save new portions (their column records) + self->IncCounter(NColumnShard::COUNTER_BLOBS_DEACTIVATED, blobsDeactivated.size()); + for (auto& blobId : blobsDeactivated) { + self->IncCounter(NColumnShard::COUNTER_BYTES_DEACTIVATED, blobId.BlobSize()); + } + } { - auto g = self.GranulesStorage->StartPackModification(); - THashSet usedPortionIds; + auto g = context.EngineLogs.GranulesStorage->GetStats()->StartPackModification(); for (auto& [_, portionInfo] : PortionsToRemove) { - Y_ABORT_UNLESS(!portionInfo.Empty()); - Y_ABORT_UNLESS(portionInfo.HasRemoveSnapshot()); - - const TPortionInfo& oldInfo = self.GetGranuleVerified(portionInfo.GetPathId()).GetPortionVerified(portionInfo.GetPortion()); - AFL_VERIFY(usedPortionIds.emplace(portionInfo.GetPortionId()).second)("portion_info", portionInfo.DebugString(true)); - self.UpsertPortion(portionInfo, &oldInfo); - - portionInfo.SaveToDatabase(context.DB); + context.EngineLogs.CleanupPortions[portionInfo.GetRemoveSnapshotVerified()].emplace_back(portionInfo); + const TPortionInfo& oldInfo = context.EngineLogs.GetGranuleVerified(portionInfo.GetPathId()).GetPortionVerified(portionInfo.GetPortion()); + context.EngineLogs.UpsertPortion(portionInfo, &oldInfo); } for (auto& portionInfoWithBlobs : AppendedPortions) { auto& portionInfo = portionInfoWithBlobs.GetPortionInfo(); - Y_ABORT_UNLESS(!portionInfo.Empty()); - AFL_VERIFY(usedPortionIds.emplace(portionInfo.GetPortionId()).second)("portion_info", portionInfo.DebugString(true)); - self.UpsertPortion(portionInfo); - portionInfo.SaveToDatabase(context.DB); + context.EngineLogs.UpsertPortion(portionInfo); } } - - for (auto& [_, portionInfo] : PortionsToRemove) { - self.CleanupPortions[portionInfo.GetRemoveSnapshot()].emplace_back(portionInfo); - } - - return true; } void TChangesWithAppend::DoCompile(TFinalizationContext& context) { @@ -88,7 +98,7 @@ void TChangesWithAppend::DoCompile(TFinalizationContext& context) { } std::vector TChangesWithAppend::MakeAppendedPortions(const std::shared_ptr batch, - const ui64 pathId, const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context) const { + const ui64 pathId, const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context, const std::optional& overrideSaver) const { Y_ABORT_UNLESS(batch->num_rows()); auto resultSchema = context.SchemaVersions.GetSchema(snapshot); @@ -97,29 +107,33 @@ std::vector TChangesWithAppend::MakeAppendedPortions(cons if (granuleMeta) { stats = granuleMeta->BuildSerializationStats(resultSchema); } - auto schema = std::make_shared(resultSchema, SaverContext, stats); + auto schema = std::make_shared(resultSchema, stats); + if (overrideSaver) { + schema->SetOverrideSerializer(*overrideSaver); + } std::vector out; { - std::vector pages = TRBSplitLimiter::BuildSimpleSlices(batch, SplitSettings, context.Counters.SplitterCounters, schema); + std::vector pages = TBatchSerializedSlice::BuildSimpleSlices(batch, NSplitter::TSplitSettings(), context.Counters.SplitterCounters, schema); std::vector generalPages; for (auto&& i : pages) { - std::map>> portionColumns = i.GetPortionChunks(); + auto portionColumns = i.GetPortionChunksToHash(); resultSchema->GetIndexInfo().AppendIndexes(portionColumns); - generalPages.emplace_back(portionColumns, schema, context.Counters.SplitterCounters, SplitSettings); + generalPages.emplace_back(portionColumns, schema, context.Counters.SplitterCounters); } - TSimilarSlicer slicer(SplitSettings.GetExpectedPortionSize()); + const NSplitter::TEntityGroups groups = resultSchema->GetIndexInfo().GetEntityGroupsByStorageId(IStoragesManager::DefaultStorageId, *SaverContext.GetStoragesManager()); + TSimilarPacker slicer(NSplitter::TSplitSettings().GetExpectedPortionSize()); auto packs = slicer.Split(generalPages); ui32 recordIdx = 0; for (auto&& i : packs) { TGeneralSerializedSlice slice(std::move(i)); auto b = batch->Slice(recordIdx, slice.GetRecordsCount()); - std::vector>> chunksByBlobs = slice.GroupChunksByBlobs(); - out.emplace_back(TPortionInfoWithBlobs::BuildByBlobs(chunksByBlobs, nullptr, pathId, snapshot, SaverContext.GetStorageOperator())); + out.emplace_back(TPortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), nullptr, pathId, resultSchema->GetVersion(), snapshot, SaverContext.GetStoragesManager())); + out.back().FillStatistics(resultSchema->GetIndexInfo()); NArrow::TFirstLastSpecialKeys primaryKeys(slice.GetFirstLastPKBatch(resultSchema->GetIndexInfo().GetReplaceKey())); NArrow::TMinMaxSpecialKeys snapshotKeys(b, TIndexInfo::ArrowSchemaSnapshot()); - out.back().GetPortionInfo().AddMetadata(*resultSchema, primaryKeys, snapshotKeys, SaverContext.GetTierName()); + out.back().GetPortionInfo().AddMetadata(*resultSchema, primaryKeys, snapshotKeys, IStoragesManager::DefaultStorageId); recordIdx += slice.GetRecordsCount(); } } diff --git a/ydb/core/tx/columnshard/engines/changes/with_appended.h b/ydb/core/tx/columnshard/engines/changes/with_appended.h index 779a3ab8a14f..ebac536d0110 100644 --- a/ydb/core/tx/columnshard/engines/changes/with_appended.h +++ b/ydb/core/tx/columnshard/engines/changes/with_appended.h @@ -11,43 +11,39 @@ class TChangesWithAppend: public TColumnEngineChanges { using TBase = TColumnEngineChanges; protected: - TSplitSettings SplitSettings; TSaverContext SaverContext; virtual void DoCompile(TFinalizationContext& context) override; - virtual bool DoApplyChanges(TColumnEngineForLogs& self, TApplyChangesContext& context) override; - virtual void DoWriteIndex(NColumnShard::TColumnShard& self, TWriteIndexContext& context) override; - virtual void DoWriteIndexComplete(NColumnShard::TColumnShard& /*self*/, TWriteIndexCompleteContext& /*context*/) override { - - } + virtual void DoWriteIndexOnExecute(NColumnShard::TColumnShard* self, TWriteIndexContext& context) override; + virtual void DoWriteIndexOnComplete(NColumnShard::TColumnShard* self, TWriteIndexCompleteContext& context) override; virtual void DoStart(NColumnShard::TColumnShard& self) override; std::vector MakeAppendedPortions(const std::shared_ptr batch, const ui64 granule, - const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context) const; + const TSnapshot& snapshot, const TGranuleMeta* granuleMeta, TConstructionContext& context, const std::optional& overrideSaver) const; virtual void DoDebugString(TStringOutput& out) const override { out << "remove=" << PortionsToRemove.size() << ";append=" << AppendedPortions.size() << ";"; } -public: - const TSplitSettings& GetSplitSettings() const { - return SplitSettings; + virtual std::shared_ptr DoBuildDataLockImpl() const = 0; + + virtual std::shared_ptr DoBuildDataLock() const override final { + auto actLock = DoBuildDataLockImpl(); + if (actLock) { + auto selfLock = std::make_shared(TypeString() + "::" + GetTaskIdentifier() + "::REMOVE", PortionsToRemove); + return std::make_shared(TypeString() + "::" + GetTaskIdentifier(), std::vector>({actLock, selfLock})); + } else { + auto selfLock = std::make_shared(TypeString() + "::" + GetTaskIdentifier(), PortionsToRemove); + return selfLock; + } } - TChangesWithAppend(const TSplitSettings& splitSettings, const TSaverContext& saverContext, const TString& consumerId) +public: + TChangesWithAppend(const TSaverContext& saverContext, const NBlobOperations::EConsumer consumerId) : TBase(saverContext.GetStoragesManager(), consumerId) - , SplitSettings(splitSettings) , SaverContext(saverContext) { } - virtual THashSet GetTouchedPortions() const override { - THashSet result; - for (auto&& i : PortionsToRemove) { - result.emplace(i.first); - } - return result; - } - THashMap PortionsToRemove; std::vector AppendedPortions; virtual ui32 GetWritePortionsCount() const override { diff --git a/ydb/core/tx/columnshard/engines/changes/ya.make b/ydb/core/tx/columnshard/engines/changes/ya.make index faf74a7c05bb..5a266e18bb0a 100644 --- a/ydb/core/tx/columnshard/engines/changes/ya.make +++ b/ydb/core/tx/columnshard/engines/changes/ya.make @@ -4,7 +4,8 @@ SRCS( compaction.cpp ttl.cpp indexation.cpp - cleanup.cpp + cleanup_portions.cpp + cleanup_tables.cpp with_appended.cpp general_compaction.cpp ) @@ -16,6 +17,7 @@ PEERDIR( ydb/core/tx/columnshard/engines/changes/abstract ydb/core/tx/columnshard/engines/changes/compaction ydb/core/tx/columnshard/engines/changes/counters + ydb/core/tx/columnshard/engines/changes/actualization ydb/core/tx/columnshard/splitter ydb/core/tablet_flat ydb/core/tx/tiering diff --git a/ydb/core/tx/columnshard/engines/column_engine.cpp b/ydb/core/tx/columnshard/engines/column_engine.cpp index efe84977f65d..d6f46742093c 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine.cpp @@ -1,12 +1,24 @@ #include "column_engine.h" -#include "changes/abstract/abstract.h" -#include +#include +#include namespace NKikimr::NOlap { +const std::shared_ptr& IColumnEngine::GetReplaceKey() const { + return GetVersionedIndex().GetLastSchema()->GetIndexInfo().GetReplaceKey(); +} + +ui64 IColumnEngine::GetMetadataLimit() { + if (!HasAppData()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("total", NSystemInfo::TotalMemorySize()); + return NSystemInfo::TotalMemorySize() * 0.3; + } else if (AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().HasAbsoluteValue()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("value", AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().GetAbsoluteValue()); + return AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().GetAbsoluteValue(); + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("total", NSystemInfo::TotalMemorySize())("kff", AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().GetTotalRatio()); + return NSystemInfo::TotalMemorySize() * AppDataVerified().ColumnShardConfig.GetIndexMetadataMemoryLimit().GetTotalRatio(); + } } -template <> -void Out(IOutputStream& out, TTypeTraits::TFuncParam changes) { - out << changes.DebugString(); } diff --git a/ydb/core/tx/columnshard/engines/column_engine.h b/ydb/core/tx/columnshard/engines/column_engine.h index 618b70a27c0c..aba511eec9b1 100644 --- a/ydb/core/tx/columnshard/engines/column_engine.h +++ b/ydb/core/tx/columnshard/engines/column_engine.h @@ -1,10 +1,11 @@ #pragma once #include "db_wrapper.h" -#include "portions/portion_info.h" #include "scheme/snapshot_scheme.h" #include "predicate/filter.h" #include "changes/abstract/settings.h" #include "changes/abstract/compaction_info.h" +#include "scheme/versions/versioned_index.h" + #include namespace NKikimr::NColumnShard { @@ -17,7 +18,12 @@ class TInsertColumnEngineChanges; class TCompactColumnEngineChanges; class TColumnEngineChanges; class TTTLColumnEngineChanges; -class TCleanupColumnEngineChanges; +class TCleanupPortionsColumnEngineChanges; +class TCleanupTablesColumnEngineChanges; +class TPortionInfo; +namespace NDataLocks { +class TManager; +} struct TSelectInfo { struct TStats { @@ -60,12 +66,9 @@ struct TSelectInfo { out.Records += portionInfo->NumChunks(); out.Rows += portionInfo->NumRows(); for (auto& rec : portionInfo->Records) { - uniqBlob.insert(rec.BlobRange.BlobId); + out.Bytes += rec.BlobRange.Size; } - } - out.Blobs += uniqBlob.size(); - for (auto blobId : uniqBlob) { - out.Bytes += blobId.BlobSize(); + out.Blobs += portionInfo->GetBlobIdsCount(); } return out; } @@ -156,7 +159,6 @@ class TColumnEngineStats { i64 Tables{}; i64 ColumnRecords{}; - i64 ColumnMetadataBytes{}; THashMap StatsByType; std::vector GetKinds() const { @@ -286,82 +288,18 @@ class TColumnEngineStats { } }; -class TVersionedIndex { - std::map Snapshots; - std::shared_ptr PrimaryKey; - std::map SnapshotByVersion; - ui64 LastSchemaVersion = 0; -public: - TString DebugString() const { - TStringBuilder sb; - for (auto&& i : Snapshots) { - sb << i.first << ":" << i.second->DebugString() << ";"; - } - return sb; - } - - ISnapshotSchema::TPtr GetSchema(const ui64 version) const { - auto it = SnapshotByVersion.find(version); - return it == SnapshotByVersion.end() ? nullptr : it->second; - } - - ISnapshotSchema::TPtr GetSchemaVerified(const ui64 version) const { - auto it = SnapshotByVersion.find(version); - Y_ABORT_UNLESS(it != SnapshotByVersion.end(), "no schema for version %lu", version); - return it->second; - } - - ISnapshotSchema::TPtr GetSchema(const TSnapshot& version) const { - for (auto it = Snapshots.rbegin(); it != Snapshots.rend(); ++it) { - if (it->first <= version) { - return it->second; - } - } - Y_ABORT_UNLESS(!Snapshots.empty()); - Y_ABORT_UNLESS(version.IsZero()); - return Snapshots.begin()->second; // For old compaction logic compatibility - } - - ISnapshotSchema::TPtr GetLastSchema() const { - Y_ABORT_UNLESS(!Snapshots.empty()); - return Snapshots.rbegin()->second; - } - - bool IsEmpty() const { - return Snapshots.empty(); - } - - const std::shared_ptr& GetPrimaryKey() const noexcept { - return PrimaryKey; - } - - void AddIndex(const TSnapshot& snapshot, TIndexInfo&& indexInfo) { - if (Snapshots.empty()) { - PrimaryKey = indexInfo.GetPrimaryKey(); - } else { - Y_ABORT_UNLESS(PrimaryKey->Equals(indexInfo.GetPrimaryKey())); - } - - auto newVersion = indexInfo.GetVersion(); - auto itVersion = SnapshotByVersion.emplace(newVersion, std::make_shared(std::move(indexInfo), snapshot)); - if (!itVersion.second) { - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("message", "Skip registered version")("version", LastSchemaVersion); - } - auto itSnap = Snapshots.emplace(snapshot, itVersion.first->second); - Y_ABORT_UNLESS(itSnap.second); - LastSchemaVersion = std::max(newVersion, LastSchemaVersion); - } -}; - - class IColumnEngine { protected: virtual void DoRegisterTable(const ui64 pathId) = 0; public: + + static ui64 GetMetadataLimit(); + virtual ~IColumnEngine() = default; virtual const TVersionedIndex& GetVersionedIndex() const = 0; - virtual const std::shared_ptr& GetReplaceKey() const { return GetVersionedIndex().GetLastSchema()->GetIndexInfo().GetReplaceKey(); } + virtual std::shared_ptr CopyVersionedIndexPtr() const = 0; + virtual const std::shared_ptr& GetReplaceKey() const; virtual bool HasDataInPathId(const ui64 pathId) const = 0; virtual bool Load(IDbWrapper& db) = 0; @@ -369,21 +307,24 @@ class IColumnEngine { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "RegisterTable")("path_id", pathId); return DoRegisterTable(pathId); } + virtual bool IsOverloadedByMetadata(const ui64 limit) const = 0; virtual std::shared_ptr Select(ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter) const = 0; virtual std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept = 0; - virtual std::shared_ptr StartCompaction(const TCompactionLimits& limits, const THashSet& busyPortions) noexcept = 0; - virtual std::shared_ptr StartCleanup(const TSnapshot& snapshot, THashSet& pathsToDrop, - ui32 maxRecords) noexcept = 0; - virtual std::shared_ptr StartTtl(const THashMap& pathEviction, - const THashSet& busyPortions, const ui64 memoryUsageLimit) noexcept = 0; + virtual std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept = 0; + virtual std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, + const std::shared_ptr& dataLocksManager) noexcept = 0; + virtual std::shared_ptr StartCleanupTables(THashSet& pathsToDrop) noexcept = 0; + virtual std::vector> StartTtl(const THashMap& pathEviction, + const std::shared_ptr& dataLocksManager, const ui64 memoryUsageLimit) noexcept = 0; virtual bool ApplyChanges(IDbWrapper& db, std::shared_ptr changes, const TSnapshot& snapshot) noexcept = 0; virtual void RegisterSchemaVersion(const TSnapshot& snapshot, TIndexInfo&& info) = 0; + virtual void RegisterSchemaVersion(const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) = 0; virtual const TMap>& GetStats() const = 0; virtual const TColumnEngineStats& GetTotalStats() = 0; virtual ui64 MemoryUsage() const { return 0; } virtual TSnapshot LastUpdate() const { return TSnapshot::Zero(); } - virtual void OnTieringModified(std::shared_ptr manager, const NColumnShard::TTtl& ttl) = 0; + virtual void OnTieringModified(const std::shared_ptr& manager, const NColumnShard::TTtl& ttl, const std::optional pathId) = 0; }; } diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp index 17833bc5a6e6..5b3710f18c65 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.cpp +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.cpp @@ -1,18 +1,21 @@ #include "column_engine_logs.h" #include "filter.h" +#include "changes/actualization/construction/context.h" +#include "changes/indexation.h" +#include "changes/general_compaction.h" +#include "changes/cleanup_portions.h" +#include "changes/cleanup_tables.h" +#include "changes/ttl.h" + #include #include -#include -#include -#include #include #include +#include +#include + #include -#include "changes/indexation.h" -#include "changes/general_compaction.h" -#include "changes/cleanup.h" -#include "changes/ttl.h" #include #include @@ -21,22 +24,27 @@ namespace NKikimr::NOlap { -TColumnEngineForLogs::TColumnEngineForLogs(ui64 tabletId, const TCompactionLimits& limits, const std::shared_ptr& storagesManager) - : GranulesStorage(std::make_shared(SignalCounters, limits, storagesManager)) +TColumnEngineForLogs::TColumnEngineForLogs(ui64 tabletId, const std::shared_ptr& storagesManager, + const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) + : GranulesStorage(std::make_shared(SignalCounters, storagesManager)) , StoragesManager(storagesManager) , TabletId(tabletId) , LastPortion(0) , LastGranule(0) { + ActualizationController = std::make_shared(); + RegisterSchemaVersion(snapshot, schema); } -ui64 TColumnEngineForLogs::MemoryUsage() const { - auto numPortions = Counters.GetPortionsCount(); - - return Counters.Tables * (sizeof(TGranuleMeta) + sizeof(ui64)) + - numPortions * (sizeof(TPortionInfo) + sizeof(ui64)) + - Counters.ColumnRecords * sizeof(TColumnRecord) + - Counters.ColumnMetadataBytes; +TColumnEngineForLogs::TColumnEngineForLogs(ui64 tabletId, const std::shared_ptr& storagesManager, + const TSnapshot& snapshot, TIndexInfo&& schema) + : GranulesStorage(std::make_shared(SignalCounters, storagesManager)) + , StoragesManager(storagesManager) + , TabletId(tabletId) + , LastPortion(0) + , LastGranule(0) { + ActualizationController = std::make_shared(); + RegisterSchemaVersion(snapshot, std::move(schema)); } const TMap>& TColumnEngineForLogs::GetStats() const { @@ -44,8 +52,7 @@ const TMap>& TColumnEngineForLogs::Get } const TColumnEngineStats& TColumnEngineForLogs::GetTotalStats() { - Counters.Tables = Tables.size(); - + Counters.Tables = GranulesStorage->GetTables().size(); return Counters; } @@ -63,22 +70,17 @@ void TColumnEngineForLogs::UpdatePortionStats(const TPortionInfo& portionInfo, E UpdatePortionStats(*PathStats[pathId], portionInfo, updateType, exPortionInfo); } -TColumnEngineStats::TPortionsStats DeltaStats(const TPortionInfo& portionInfo, ui64& metadataBytes) { +TColumnEngineStats::TPortionsStats DeltaStats(const TPortionInfo& portionInfo) { TColumnEngineStats::TPortionsStats deltaStats; - THashSet blobs; + deltaStats.Bytes = 0; for (auto& rec : portionInfo.Records) { - metadataBytes += rec.GetMeta().GetMetadataSize(); - blobs.insert(rec.BlobRange.BlobId); deltaStats.BytesByColumn[rec.ColumnId] += rec.BlobRange.Size; - deltaStats.RawBytesByColumn[rec.ColumnId] += rec.GetMeta().GetRawBytes().value_or(0); + deltaStats.RawBytesByColumn[rec.ColumnId] += rec.GetMeta().GetRawBytes(); } deltaStats.Rows = portionInfo.NumRows(); - deltaStats.RawBytes = portionInfo.RawBytesSum(); - deltaStats.Bytes = 0; - for (auto& blobId : blobs) { - deltaStats.Bytes += blobId.BlobSize(); - } - deltaStats.Blobs = blobs.size(); + deltaStats.Bytes = portionInfo.GetTotalBlobBytes(); + deltaStats.RawBytes = portionInfo.GetTotalRawBytes(); + deltaStats.Blobs = portionInfo.GetBlobIdsCount(); deltaStats.Portions = 1; return deltaStats; } @@ -87,8 +89,7 @@ void TColumnEngineForLogs::UpdatePortionStats(TColumnEngineStats& engineStats, c EStatsUpdateType updateType, const TPortionInfo* exPortionInfo) const { ui64 columnRecords = portionInfo.Records.size(); - ui64 metadataBytes = 0; - TColumnEngineStats::TPortionsStats deltaStats = DeltaStats(portionInfo, metadataBytes); + TColumnEngineStats::TPortionsStats deltaStats = DeltaStats(portionInfo); Y_ABORT_UNLESS(!exPortionInfo || exPortionInfo->GetMeta().Produced != TPortionMeta::EProduced::UNSPECIFIED); Y_ABORT_UNLESS(portionInfo.GetMeta().Produced != TPortionMeta::EProduced::UNSPECIFIED); @@ -107,23 +108,19 @@ void TColumnEngineForLogs::UpdatePortionStats(TColumnEngineStats& engineStats, c if (isErase) { // PortionsToDrop engineStats.ColumnRecords -= columnRecords; - engineStats.ColumnMetadataBytes -= metadataBytes; stats -= deltaStats; } else if (isAdd) { // Load || AppendedPortions engineStats.ColumnRecords += columnRecords; - engineStats.ColumnMetadataBytes += metadataBytes; stats += deltaStats; } else if (&srcStats != &stats || exPortionInfo) { // SwitchedPortions || PortionsToEvict stats += deltaStats; if (exPortionInfo) { - ui64 rmMetadataBytes = 0; - srcStats -= DeltaStats(*exPortionInfo, rmMetadataBytes); + srcStats -= DeltaStats(*exPortionInfo); engineStats.ColumnRecords += columnRecords - exPortionInfo->Records.size(); - engineStats.ColumnMetadataBytes += metadataBytes - rmMetadataBytes; } else { srcStats -= deltaStats; } @@ -135,7 +132,27 @@ void TColumnEngineForLogs::RegisterSchemaVersion(const TSnapshot& snapshot, TInd const NOlap::TIndexInfo& lastIndexInfo = VersionedIndex.GetLastSchema()->GetIndexInfo(); Y_ABORT_UNLESS(lastIndexInfo.CheckCompatible(indexInfo)); } + const bool isCriticalScheme = indexInfo.GetSchemeNeedActualization(); VersionedIndex.AddIndex(snapshot, std::move(indexInfo)); + if (isCriticalScheme) { + if (!ActualizationStarted) { + ActualizationStarted = true; + for (auto&& i : GranulesStorage->GetTables()) { + i.second->StartActualizationIndex(); + } + } + for (auto&& i : GranulesStorage->GetTables()) { + i.second->RefreshScheme(); + } + } +} + +void TColumnEngineForLogs::RegisterSchemaVersion(const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) { + std::optional indexInfoOptional = NOlap::TIndexInfo::BuildFromProto(schema, StoragesManager); + AFL_VERIFY(indexInfoOptional); + NOlap::TIndexInfo indexInfo = std::move(*indexInfoOptional); + indexInfo.SetAllKeys(StoragesManager); + RegisterSchemaVersion(snapshot, std::move(indexInfo)); } bool TColumnEngineForLogs::Load(IDbWrapper& db) { @@ -144,7 +161,7 @@ bool TColumnEngineForLogs::Load(IDbWrapper& db) { THashMap granuleToPathIdDecoder; { TMemoryProfileGuard g("TTxInit/LoadColumns"); - auto guard = GranulesStorage->StartPackModification(); + auto guard = GranulesStorage->GetStats()->StartPackModification(); if (!LoadColumns(db)) { return false; } @@ -153,11 +170,11 @@ bool TColumnEngineForLogs::Load(IDbWrapper& db) { } } - for (const auto& [pathId, spg] : Tables) { + for (const auto& [pathId, spg] : GranulesStorage->GetTables()) { for (const auto& [_, portionInfo] : spg->GetPortions()) { UpdatePortionStats(*portionInfo, EStatsUpdateType::ADD); if (portionInfo->CheckForCleanup()) { - CleanupPortions[portionInfo->GetRemoveSnapshot()].emplace_back(*portionInfo); + CleanupPortions[portionInfo->GetRemoveSnapshotVerified()].emplace_back(*portionInfo); } } } @@ -168,30 +185,26 @@ bool TColumnEngineForLogs::Load(IDbWrapper& db) { } bool TColumnEngineForLogs::LoadColumns(IDbWrapper& db) { - TSnapshot lastSnapshot(0, 0); - const TIndexInfo* currentIndexInfo = nullptr; + TPortionInfo::TSchemaCursor schema(VersionedIndex); if (!db.LoadColumns([&](const TPortionInfo& portion, const TColumnChunkLoadContext& loadContext) { - if (!currentIndexInfo || lastSnapshot != portion.GetMinSnapshot()) { - currentIndexInfo = &VersionedIndex.GetSchema(portion.GetMinSnapshot())->GetIndexInfo(); - lastSnapshot = portion.GetMinSnapshot(); - } + auto currentSchema = schema.GetSchema(portion); AFL_VERIFY(portion.ValidSnapshotInfo())("details", portion.DebugString()); // Locate granule and append the record. - TColumnRecord rec(loadContext, *currentIndexInfo); - GetGranulePtrVerified(portion.GetPathId())->AddColumnRecord(*currentIndexInfo, portion, rec, loadContext.GetPortionMeta()); + GetGranulePtrVerified(portion.GetPathId())->AddColumnRecordOnLoad(currentSchema->GetIndexInfo(), portion, loadContext, loadContext.GetPortionMeta()); })) { return false; } if (!db.LoadIndexes([&](const ui64 pathId, const ui64 portionId, const TIndexChunkLoadContext& loadContext) { - auto portion = GetGranulePtrVerified(pathId)->GetPortionPtr(portionId); + auto portion = GetGranulePtrVerified(pathId)->GetPortionOptional(portionId); AFL_VERIFY(portion); - portion->AddIndex(loadContext.BuildIndexChunk()); + const auto linkBlobId = portion->RegisterBlobId(loadContext.GetBlobRange().GetBlobId()); + portion->AddIndex(loadContext.BuildIndexChunk(linkBlobId)); })) { return false; }; - for (auto&& i : Tables) { + for (auto&& i : GranulesStorage->GetTables()) { i.second->OnAfterPortionsLoad(); } return true; @@ -221,9 +234,8 @@ bool TColumnEngineForLogs::LoadCounters(IDbWrapper& db) { std::shared_ptr TColumnEngineForLogs::StartInsert(std::vector&& dataToIndex) noexcept { Y_ABORT_UNLESS(dataToIndex.size()); - TSaverContext saverContext(StoragesManager->GetInsertOperator(), StoragesManager); - - auto changes = std::make_shared(std::move(dataToIndex), TSplitSettings(), saverContext); + TSaverContext saverContext(StoragesManager); + auto changes = std::make_shared(std::move(dataToIndex), saverContext); auto pkSchema = VersionedIndex.GetLastSchema()->GetIndexInfo().GetReplaceKey(); for (const auto& data : changes->GetDataToIndex()) { @@ -238,77 +250,123 @@ std::shared_ptr TColumnEngineForLogs::StartInsert(st return changes; } -std::shared_ptr TColumnEngineForLogs::StartCompaction(const TCompactionLimits& limits, const THashSet& busyPortions) noexcept { - THashSet busyGranuleIds; - for (auto&& i : busyPortions) { - busyGranuleIds.emplace(i.GetPathId()); - } - auto granule = GranulesStorage->GetGranuleForCompaction(Tables, busyGranuleIds); +std::shared_ptr TColumnEngineForLogs::StartCompaction(const std::shared_ptr& dataLocksManager) noexcept { + AFL_VERIFY(dataLocksManager); + auto granule = GranulesStorage->GetGranuleForCompaction(dataLocksManager); if (!granule) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "no granules for start compaction"); return nullptr; } granule->OnStartCompaction(); - auto changes = granule->GetOptimizationTask(limits, granule, busyPortions); - NYDBTest::TControllers::GetColumnShardController()->OnStartCompaction(changes); + auto changes = granule->GetOptimizationTask(granule, dataLocksManager); if (!changes) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "cannot build optimization task for granule that need compaction")("weight", granule->GetCompactionPriority().DebugString()); } return changes; } -std::shared_ptr TColumnEngineForLogs::StartCleanup(const TSnapshot& snapshot, - THashSet& pathsToDrop, ui32 /*maxRecords*/) noexcept { +std::shared_ptr TColumnEngineForLogs::StartCleanupTables(THashSet& pathsToDrop) noexcept { + if (pathsToDrop.empty()) { + return nullptr; + } + auto changes = std::make_shared(StoragesManager); + + ui64 txSize = 0; + const ui64 txSizeLimit = TGlobalLimits::TxWriteLimitBytes / 4; + THashSet pathsToRemove; + for (ui64 pathId : pathsToDrop) { + if (!HasDataInPathId(pathId)) { + changes->TablesToDrop.emplace(pathId); + } + txSize += 256; + if (txSize > txSizeLimit) { + break; + } + } + for (auto&& i : pathsToRemove) { + pathsToDrop.erase(i); + } + if (changes->TablesToDrop.empty()) { + return nullptr; + } + return changes; +} + +std::shared_ptr TColumnEngineForLogs::StartCleanupPortions(const TSnapshot& snapshot, + const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept { + AFL_VERIFY(dataLocksManager); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanup")("portions_count", CleanupPortions.size()); - auto changes = std::make_shared(StoragesManager); + auto changes = std::make_shared(StoragesManager); // Add all portions from dropped paths - THashSet dropPortions; - THashSet emptyPaths; ui64 txSize = 0; const ui64 txSizeLimit = TGlobalLimits::TxWriteLimitBytes / 4; - changes->NeedRepeat = false; + ui32 skipLocked = 0; + ui32 portionsFromDrop = 0; + bool limitExceeded = false; + THashSet uniquePortions; for (ui64 pathId : pathsToDrop) { - auto itTable = Tables.find(pathId); - if (itTable == Tables.end()) { - emptyPaths.insert(pathId); + auto g = GranulesStorage->GetGranuleOptional(pathId); + if (!g) { continue; } - for (auto& [portion, info] : itTable->second->GetPortions()) { + for (auto& [portion, info] : g->GetPortions()) { + if (dataLocksManager->IsLocked(*info)) { + ++skipLocked; + continue; + } if (txSize + info->GetTxVolume() < txSizeLimit || changes->PortionsToDrop.empty()) { txSize += info->GetTxVolume(); } else { - changes->NeedRepeat = true; + limitExceeded = true; break; } + const auto inserted = uniquePortions.emplace(info->GetAddress()).second; + Y_ABORT_UNLESS(inserted); changes->PortionsToDrop.push_back(*info); - dropPortions.insert(portion); + ++portionsFromDrop; } } - for (ui64 pathId : emptyPaths) { - pathsToDrop.erase(pathId); - } - while (CleanupPortions.size() && !changes->NeedRepeat) { - auto it = CleanupPortions.begin(); + for (auto it = CleanupPortions.begin(); !limitExceeded && it != CleanupPortions.end();) { if (it->first >= snapshot) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanupStop")("snapshot", snapshot.DebugString())("current_snapshot", it->first.DebugString()); break; } - for (auto&& i : it->second) { - Y_ABORT_UNLESS(i.CheckForCleanup(snapshot)); - if (txSize + i.GetTxVolume() < txSizeLimit || changes->PortionsToDrop.empty()) { - txSize += i.GetTxVolume(); - } else { - changes->NeedRepeat = true; - break; + for (ui32 i = 0; i < it->second.size();) { + if (dataLocksManager->IsLocked(it->second[i])) { + ++skipLocked; + ++i; + continue; + } + const auto inserted = uniquePortions.emplace(it->second[i].GetAddress()).second; + if (inserted) { + Y_ABORT_UNLESS(it->second[i].CheckForCleanup(snapshot)); + if (txSize + it->second[i].GetTxVolume() < txSizeLimit || changes->PortionsToDrop.empty()) { + txSize += it->second[i].GetTxVolume(); + } else { + limitExceeded = true; + break; + } + changes->PortionsToDrop.push_back(std::move(it->second[i])); } - changes->PortionsToDrop.push_back(i); + if (i + 1 < it->second.size()) { + it->second[i] = std::move(it->second.back()); + } + it->second.pop_back(); + } + if (limitExceeded) { + break; + } + if (it->second.empty()) { + it = CleanupPortions.erase(it); + } else { + ++it; } - CleanupPortions.erase(it); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanup")("portions_count", CleanupPortions.size())("portions_prepared", changes->PortionsToDrop.size()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartCleanup") + ("portions_count", CleanupPortions.size())("portions_prepared", changes->PortionsToDrop.size())("drop", portionsFromDrop)("skip", skipLocked); if (changes->PortionsToDrop.empty()) { return nullptr; @@ -317,170 +375,43 @@ std::shared_ptr TColumnEngineForLogs::StartCleanup( return changes; } -TDuration TColumnEngineForLogs::ProcessTiering(const ui64 pathId, const TTiering& ttl, TTieringProcessContext& context) const { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ProcessTiering")("path_id", pathId)("ttl", ttl.GetDebugString()); - auto& indexInfo = VersionedIndex.GetLastSchema()->GetIndexInfo(); - Y_ABORT_UNLESS(context.Changes->Tiering.emplace(pathId, ttl).second); - - TDuration dWaiting = NYDBTest::TControllers::GetColumnShardController()->GetTTLDefaultWaitingDuration(TDuration::Minutes(1)); - auto itTable = Tables.find(pathId); - if (itTable == Tables.end()) { - return dWaiting; - } - - std::optional expireTimestampOpt; - if (ttl.Ttl) { - expireTimestampOpt = ttl.Ttl->GetEvictInstant(context.Now); - } - - auto ttlColumnNames = ttl.GetTtlColumns(); - Y_ABORT_UNLESS(ttlColumnNames.size() == 1); // TODO: support different ttl columns - ui32 ttlColumnId = indexInfo.GetColumnId(*ttlColumnNames.begin()); - const TInstant now = TInstant::Now(); - for (auto& [portion, info] : itTable->second->GetPortions()) { - if (info->HasRemoveSnapshot()) { - continue; - } - if (context.BusyPortions.contains(info->GetAddress())) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip ttl through busy portion")("portion_id", info->GetAddress().DebugString()); - continue; - } +std::vector> TColumnEngineForLogs::StartTtl(const THashMap& pathEviction, const std::shared_ptr& dataLocksManager, + const ui64 memoryUsageLimit) noexcept { + AFL_VERIFY(dataLocksManager); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartTtl")("external", pathEviction.size()); - const bool tryEvictPortion = ttl.HasTiers() && context.HasLimitsForEviction(); - - if (auto max = info->MaxValue(ttlColumnId)) { - bool keep = !expireTimestampOpt; - if (expireTimestampOpt) { - auto mpiOpt = ttl.Ttl->ScalarToInstant(max); - Y_ABORT_UNLESS(mpiOpt); - const TInstant maxTtlPortionInstant = *mpiOpt; - const TDuration d = maxTtlPortionInstant - *expireTimestampOpt; - keep = !!d; - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "keep_detect")("max", maxTtlPortionInstant.Seconds())("expire", expireTimestampOpt->Seconds()); - if (d && dWaiting > d) { - dWaiting = d; - } - } - - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "scalar_less_result")("keep", keep)("tryEvictPortion", tryEvictPortion)("allowDrop", context.HasLimitsForTtl()); - if (keep && tryEvictPortion) { - const TString currentTierName = info->GetMeta().GetTierName() ? info->GetMeta().GetTierName() : IStoragesManager::DefaultStorageId; - TString tierName = ""; - const TInstant maxChangePortionInstant = info->RecordSnapshotMax().GetPlanInstant(); - if (now - maxChangePortionInstant < TDuration::Minutes(60)) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "skip_portion_to_evict")("reason", "too_fresh")("delta", now - maxChangePortionInstant); - continue; - } - for (auto& tierRef : ttl.GetOrderedTiers()) { - auto& tierInfo = tierRef.Get(); - if (!indexInfo.AllowTtlOverColumn(tierInfo.GetEvictColumnName())) { - SignalCounters.OnPortionNoTtlColumn(info->BlobsBytes()); - continue; - } - auto mpiOpt = tierInfo.ScalarToInstant(max); - Y_ABORT_UNLESS(mpiOpt); - const TInstant maxTieringPortionInstant = *mpiOpt; - - const TDuration d = tierInfo.GetEvictInstant(context.Now) - maxTieringPortionInstant; - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering_choosing")("max", maxTieringPortionInstant.Seconds()) - ("evict", tierInfo.GetEvictInstant(context.Now).Seconds())("tier_name", tierInfo.GetName())("d", d); - if (d) { - tierName = tierInfo.GetName(); - break; - } else { - auto dWaitLocal = maxTieringPortionInstant - tierInfo.GetEvictInstant(context.Now); - if (dWaiting > dWaitLocal) { - dWaiting = dWaitLocal; - } - } - } - if (!tierName) { - tierName = IStoragesManager::DefaultStorageId; - } - if (currentTierName != tierName) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "tiering switch detected")("from", currentTierName)("to", tierName); - context.Changes->AddPortionToEvict(*info, TPortionEvictionFeatures(tierName, pathId, StoragesManager->GetOperator(tierName))); - context.AppPortionForEvictionChecker(*info); - SignalCounters.OnPortionToEvict(info->BlobsBytes()); - } - } - if (!keep && context.HasLimitsForTtl()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "portion_remove")("portion", info->DebugString()); - AFL_VERIFY(context.Changes->PortionsToRemove.emplace(info->GetAddress(), *info).second); - SignalCounters.OnPortionToDrop(info->BlobsBytes()); - context.AppPortionForTtlChecker(*info); + TSaverContext saverContext(StoragesManager); + NActualizer::TTieringProcessContext context(memoryUsageLimit, saverContext, dataLocksManager, SignalCounters, ActualizationController); + for (auto&& i : pathEviction) { + auto g = GetGranuleOptional(i.first); + if (g) { + if (!ActualizationStarted) { + g->StartActualizationIndex(); } - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "scalar_less_not_max"); - SignalCounters.OnPortionNoBorder(info->BlobsBytes()); + g->RefreshTiering(i.second); + g->BuildActualizationTasks(context); } } - if (dWaiting > TDuration::MilliSeconds(500) && (!context.HasLimitsForEviction() || !context.HasLimitsForTtl())) { - dWaiting = TDuration::MilliSeconds(500); - } - Y_ABORT_UNLESS(!!dWaiting); - return dWaiting; -} -bool TColumnEngineForLogs::DrainEvictionQueue(std::map>& evictionsQueue, TTieringProcessContext& context) const { - const TMonotonic nowMonotonic = TlsActivationContext ? AppData()->MonotonicTimeProvider->Now() : TMonotonic::Now(); - bool hasChanges = false; - while (evictionsQueue.size() && evictionsQueue.begin()->first < nowMonotonic) { - hasChanges = true; - auto tierings = std::move(evictionsQueue.begin()->second); - evictionsQueue.erase(evictionsQueue.begin()); - for (auto&& i : tierings) { - auto itDuration = context.DurationsForced.find(i.GetPathId()); - if (itDuration == context.DurationsForced.end()) { - const TDuration dWaiting = ProcessTiering(i.GetPathId(), i.GetTieringInfo(), context); - evictionsQueue[nowMonotonic + dWaiting].emplace_back(std::move(i)); - } else { - evictionsQueue[nowMonotonic + itDuration->second].emplace_back(std::move(i)); + if (ActualizationStarted) { + TLogContextGuard lGuard(TLogContextBuilder::Build()("queue", "ttl")("external_count", pathEviction.size())); + for (auto&& i : GranulesStorage->GetTables()) { + if (pathEviction.contains(i.first)) { + continue; } - } - } - - if (evictionsQueue.size()) { - if (evictionsQueue.begin()->first < nowMonotonic) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "stop scan")("reason", "too many data")("first", evictionsQueue.begin()->first)("now", nowMonotonic); - } else if (!hasChanges) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "stop scan")("reason", "too early")("first", evictionsQueue.begin()->first)("now", nowMonotonic); - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "stop scan")("reason", "task_ready")("first", evictionsQueue.begin()->first)("now", nowMonotonic) - ("internal", hasChanges)("evict_portions", context.Changes->GetPortionsToEvictCount()) - ("drop_portions", context.Changes->PortionsToRemove.size()); + i.second->BuildActualizationTasks(context); } } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "stop scan")("reason", "no data in queue"); - } - return hasChanges; -} - -std::shared_ptr TColumnEngineForLogs::StartTtl(const THashMap& pathEviction, const THashSet& busyPortions, const ui64 memoryUsageLimit) noexcept { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "StartTtl")("external", pathEviction.size()) - ("internal", EvictionsController.MutableNextCheckInstantForTierings().size()) - ; - - TSaverContext saverContext(StoragesManager->GetDefaultOperator(), StoragesManager); - - auto changes = std::make_shared(TSplitSettings(), saverContext); - - TTieringProcessContext context(memoryUsageLimit, changes, busyPortions, TTTLColumnEngineChanges::BuildMemoryPredictor()); - bool hasExternalChanges = false; - for (auto&& i : pathEviction) { - context.DurationsForced[i.first] = ProcessTiering(i.first, i.second, context); - hasExternalChanges = true; - } - - { - TLogContextGuard lGuard(TLogContextBuilder::Build()("queue", "ttl")("has_external", hasExternalChanges)); - DrainEvictionQueue(EvictionsController.MutableNextCheckInstantForTierings(), context); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "StartTtl")("skip", "not_ready_tiers"); } - - if (changes->PortionsToRemove.empty() && !changes->GetPortionsToEvictCount()) { - return nullptr; + std::vector> result; + for (auto&& i : context.GetTasks()) { + for (auto&& t : i.second) { + SignalCounters.OnActualizationTask(t.GetTask()->GetPortionsToEvictCount(), t.GetTask()->PortionsToRemove.size()); + result.emplace_back(t.GetTask()); + } } - return changes; + return result; } bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, std::shared_ptr indexChanges, const TSnapshot& snapshot) noexcept { @@ -488,10 +419,6 @@ bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, std::shared_ptrCompile(context); } - { - TApplyChangesContext context(db, snapshot); - Y_ABORT_UNLESS(indexChanges->ApplyChanges(*this, context)); - } db.WriteCounter(LAST_PORTION, LastPortion); db.WriteCounter(LAST_GRANULE, LastGranule); @@ -504,10 +431,7 @@ bool TColumnEngineForLogs::ApplyChanges(IDbWrapper& db, std::shared_ptrsecond->IsErasable()); - Tables.erase(it); + GranulesStorage->EraseTable(pathId); } void TColumnEngineForLogs::UpsertPortion(const TPortionInfo& portionInfo, const TPortionInfo* exInfo) { @@ -525,7 +449,7 @@ bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool up const ui64 portion = portionInfo.GetPortion(); auto spg = GetGranulePtrVerified(portionInfo.GetPathId()); Y_ABORT_UNLESS(spg); - auto p = spg->GetPortionPtr(portion); + auto p = spg->GetPortionOptional(portion); if (!p) { LOG_S_WARN("Portion erased already " << portionInfo << " at tablet " << TabletId); @@ -542,12 +466,11 @@ bool TColumnEngineForLogs::ErasePortion(const TPortionInfo& portionInfo, bool up std::shared_ptr TColumnEngineForLogs::Select(ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter) const { auto out = std::make_shared(); - auto itTable = Tables.find(pathId); - if (itTable == Tables.end()) { + auto spg = GranulesStorage->GetGranuleOptional(pathId); + if (!spg) { return out; } - auto spg = itTable->second; for (const auto& [indexKey, keyPortions] : spg->GroupOrderedPortionsByPK()) { for (auto&& [_, portionInfo] : keyPortions) { if (!portionInfo->IsVisible(snapshot)) { @@ -567,42 +490,50 @@ std::shared_ptr TColumnEngineForLogs::Select(ui64 pathId, TSnapshot return out; } -void TColumnEngineForLogs::OnTieringModified(std::shared_ptr manager, const NColumnShard::TTtl& ttl) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnTieringModified"); - std::optional> tierings; - if (manager) { - tierings = manager->GetTiering(); +void TColumnEngineForLogs::OnTieringModified(const std::shared_ptr& manager, const NColumnShard::TTtl& ttl, const std::optional pathId) { + if (!ActualizationStarted) { + for (auto&& i : GranulesStorage->GetTables()) { + i.second->StartActualizationIndex(); + } } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnTieringModified") - ("new_count_tierings", tierings ? ::ToString(tierings->size()) : TString("undefined")) - ("new_count_ttls", ttl.PathsCount()); - EvictionsController.RefreshTierings(std::move(tierings), ttl); -} + ActualizationStarted = true; + AFL_VERIFY(manager); + THashMap tierings = manager->GetTiering(); + ttl.AddTtls(tierings); -void TColumnEngineForLogs::DoRegisterTable(const ui64 pathId) { - AFL_VERIFY(Tables.emplace(pathId, std::make_shared(pathId, GranulesStorage, SignalCounters.RegisterGranuleDataCounters(), VersionedIndex)).second); -} + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnTieringModified") + ("new_count_tierings", tierings.size()) + ("new_count_ttls", ttl.PathsCount()); + // some string -TColumnEngineForLogs::TTieringProcessContext::TTieringProcessContext(const ui64 memoryUsageLimit, - std::shared_ptr changes, const THashSet& busyPortions, const std::shared_ptr& memoryPredictor) - : MemoryUsageLimit(memoryUsageLimit) - , MemoryPredictor(memoryPredictor) - , Now(TlsActivationContext ? AppData()->TimeProvider->Now() : TInstant::Now()) - , Changes(changes) - , BusyPortions(busyPortions) -{ + if (pathId) { + auto g = GetGranulePtrVerified(*pathId); + auto it = tierings.find(*pathId); + if (it == tierings.end()) { + g->RefreshTiering({}); + } else { + g->RefreshTiering(it->second); + } + } else { + for (auto&& [gPathId, g] : GranulesStorage->GetTables()) { + auto it = tierings.find(gPathId); + if (it == tierings.end()) { + g->RefreshTiering({}); + } else { + g->RefreshTiering(it->second); + } + } + } } -void TEvictionsController::RefreshTierings(std::optional>&& tierings, const NColumnShard::TTtl& ttl) { - if (tierings) { - OriginalTierings = std::move(*tierings); +void TColumnEngineForLogs::DoRegisterTable(const ui64 pathId) { + std::shared_ptr g = GranulesStorage->RegisterTable(pathId, SignalCounters.RegisterGranuleDataCounters(), VersionedIndex); + if (ActualizationStarted) { + g->StartActualizationIndex(); + g->RefreshScheme(); } - auto copy = OriginalTierings; - ttl.AddTtls(copy); - NextCheckInstantForTierings = BuildNextInstantCheckers(std::move(copy)); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "RefreshTierings")("count", NextCheckInstantForTierings.size()); } } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/column_engine_logs.h b/ydb/core/tx/columnshard/engines/column_engine_logs.h index c281d9b653f8..d991a6878f7d 100644 --- a/ydb/core/tx/columnshard/engines/column_engine_logs.h +++ b/ydb/core/tx/columnshard/engines/column_engine_logs.h @@ -6,12 +6,12 @@ #include #include #include + +#include "changes/actualization/controller/controller.h" + #include "scheme/tier_info.h" #include "storage/granule.h" #include "storage/storage.h" -#include "changes/indexation.h" -#include "changes/ttl.h" -#include "changes/with_appended.h" namespace NKikimr::NArrow { struct TSortDescription; @@ -19,54 +19,18 @@ struct TSortDescription; namespace NKikimr::NOlap { -struct TReadMetadata; -class TGranulesTable; -class TColumnsTable; -class TCountersTable; +class TCompactColumnEngineChanges; +class TTTLColumnEngineChanges; +class TChangesWithAppend; +class TCompactColumnEngineChanges; +class TCleanupPortionsColumnEngineChanges; +class TCleanupTablesColumnEngineChanges; -class TEvictionsController { -public: - class TTieringWithPathId { - private: - const ui64 PathId; - TTiering TieringInfo; - public: - TTieringWithPathId(const ui64 pathId, TTiering&& tieringInfo) - : PathId(pathId) - , TieringInfo(std::move(tieringInfo)) - { - - } - - ui64 GetPathId() const { - return PathId; - } - - const TTiering& GetTieringInfo() const { - return TieringInfo; - } - }; -private: - THashMap OriginalTierings; - std::map> NextCheckInstantForTierings; - - std::map> BuildNextInstantCheckers(THashMap&& info) { - std::map> result; - std::vector newTasks; - for (auto&& i : info) { - newTasks.emplace_back(i.first, std::move(i.second)); - } - result.emplace(TMonotonic::Zero(), std::move(newTasks)); - return result; - } -public: - std::map>& MutableNextCheckInstantForTierings() { - return NextCheckInstantForTierings; - } - - void RefreshTierings(std::optional>&& tierings, const NColumnShard::TTtl& ttl); -}; +namespace NDataSharing { +class TDestinationSession; +} +struct TReadMetadata; /// Engine with 2 tables: /// - Granules: PK -> granules (use part of PK) @@ -78,48 +42,22 @@ class TColumnEngineForLogs : public IColumnEngine { friend class TTTLColumnEngineChanges; friend class TChangesWithAppend; friend class TCompactColumnEngineChanges; - friend class TCleanupColumnEngineChanges; + friend class TCleanupPortionsColumnEngineChanges; + friend class TCleanupTablesColumnEngineChanges; + friend class NDataSharing::TDestinationSession; private: + bool ActualizationStarted = false; const NColumnShard::TEngineLogsCounters SignalCounters; std::shared_ptr GranulesStorage; std::shared_ptr StoragesManager; - TEvictionsController EvictionsController; - class TTieringProcessContext { - private: - const ui64 MemoryUsageLimit; - ui64 MemoryUsage = 0; - ui64 TxWriteVolume = 0; - std::shared_ptr MemoryPredictor; - public: - const TInstant Now; - std::shared_ptr Changes; - std::map DurationsForced; - const THashSet& BusyPortions; - - void AppPortionForEvictionChecker(const TPortionInfo& info) { - MemoryUsage = MemoryPredictor->AddPortion(info); - TxWriteVolume += info.GetTxVolume(); - } - - void AppPortionForTtlChecker(const TPortionInfo& info) { - TxWriteVolume += info.GetTxVolume(); - } - - bool HasLimitsForEviction() const { - return MemoryUsage < MemoryUsageLimit && TxWriteVolume < TGlobalLimits::TxWriteLimitBytes; - } - - bool HasLimitsForTtl() const { - return TxWriteVolume < TGlobalLimits::TxWriteLimitBytes; - } - - TTieringProcessContext(const ui64 memoryUsageLimit, std::shared_ptr changes, - const THashSet& busyPortions, const std::shared_ptr& memoryPredictor); - }; - TDuration ProcessTiering(const ui64 pathId, const TTiering& tiering, TTieringProcessContext& context) const; - bool DrainEvictionQueue(std::map>& evictionsQueue, TTieringProcessContext& context) const; + std::shared_ptr ActualizationController; + public: + const std::shared_ptr& GetActualizationController() const { + return ActualizationController; + } + ui64* GetLastPortionPointer() { return &LastPortion; } @@ -141,9 +79,14 @@ class TColumnEngineForLogs : public IColumnEngine { ADD, }; - TColumnEngineForLogs(ui64 tabletId, const TCompactionLimits& limits, const std::shared_ptr& storagesManager); + TColumnEngineForLogs(ui64 tabletId, const std::shared_ptr& storagesManager, const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema); + TColumnEngineForLogs(ui64 tabletId, const std::shared_ptr& storagesManager, const TSnapshot& snapshot, TIndexInfo&& schema); + + virtual void OnTieringModified(const std::shared_ptr& manager, const NColumnShard::TTtl& ttl, const std::optional pathId) override; - virtual void OnTieringModified(std::shared_ptr manager, const NColumnShard::TTtl& ttl) override; + virtual std::shared_ptr CopyVersionedIndexPtr() const override { + return std::make_shared(VersionedIndex); + } const TVersionedIndex& GetVersionedIndex() const override { return VersionedIndex; @@ -151,33 +94,37 @@ class TColumnEngineForLogs : public IColumnEngine { const TMap>& GetStats() const override; const TColumnEngineStats& GetTotalStats() override; - ui64 MemoryUsage() const override; TSnapshot LastUpdate() const override { return LastSnapshot; } virtual void DoRegisterTable(const ui64 pathId) override; public: bool Load(IDbWrapper& db) override; - std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; - std::shared_ptr StartCompaction(const TCompactionLimits& limits, const THashSet& busyPortions) noexcept override; - std::shared_ptr StartCleanup(const TSnapshot& snapshot, THashSet& pathsToDrop, ui32 maxRecords) noexcept override; - std::shared_ptr StartTtl(const THashMap& pathEviction, - const THashSet& busyPortions, const ui64 memoryUsageLimit) noexcept override; + virtual bool IsOverloadedByMetadata(const ui64 limit) const override { + return limit < TGranulesStat::GetSumMetadataMemoryPortionsSize(); + } + std::shared_ptr StartInsert(std::vector&& dataToIndex) noexcept override; + std::shared_ptr StartCompaction(const std::shared_ptr& dataLocksManager) noexcept override; + std::shared_ptr StartCleanupPortions(const TSnapshot& snapshot, const THashSet& pathsToDrop, const std::shared_ptr& dataLocksManager) noexcept override; + std::shared_ptr StartCleanupTables(THashSet& pathsToDrop) noexcept override; + std::vector> StartTtl(const THashMap& pathEviction, + const std::shared_ptr& locksManager, const ui64 memoryUsageLimit) noexcept override; + + void ReturnToIndexes(const THashMap>& portions) const { + return GranulesStorage->ReturnToIndexes(portions); + } bool ApplyChanges(IDbWrapper& db, std::shared_ptr indexChanges, const TSnapshot& snapshot) noexcept override; void RegisterSchemaVersion(const TSnapshot& snapshot, TIndexInfo&& info) override; + void RegisterSchemaVersion(const TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) override; std::shared_ptr Select(ui64 pathId, TSnapshot snapshot, const TPKRangesFilter& pkRangesFilter) const override; bool IsPortionExists(const ui64 pathId, const ui64 portionId) const { - auto it = Tables.find(pathId); - if (it == Tables.end()) { - return false; - } - return !!it->second->GetPortionPtr(portionId); + return !!GranulesStorage->GetPortionOptional(pathId, portionId); } virtual bool HasDataInPathId(const ui64 pathId) const override { @@ -200,22 +147,11 @@ class TColumnEngineForLogs : public IColumnEngine { } std::shared_ptr GetGranuleOptional(const ui64 pathId) const { - auto it = Tables.find(pathId); - if (it == Tables.end()) { - return nullptr; - } - return it->second; + return GranulesStorage->GetGranuleOptional(pathId); } - std::vector> GetTables(const ui64 pathIdFrom, const ui64 pathIdTo) const { - std::vector> result; - for (auto&& i : Tables) { - if (i.first < pathIdFrom || i.first > pathIdTo) { - continue; - } - result.emplace_back(i.second); - } - return result; + std::vector> GetTables(const std::optional pathIdFrom, const std::optional pathIdTo) const { + return GranulesStorage->GetTables(pathIdFrom, pathIdTo); } ui64 GetTabletId() const { @@ -225,9 +161,6 @@ class TColumnEngineForLogs : public IColumnEngine { private: TVersionedIndex VersionedIndex; ui64 TabletId; - std::shared_ptr ColumnsTable; - std::shared_ptr CountersTable; - THashMap> Tables; // pathId into Granule that equal to Table TMap> PathStats; // per path_id stats sorted by path_id std::map> CleanupPortions; TColumnEngineStats Counters; diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.cpp b/ydb/core/tx/columnshard/engines/db_wrapper.cpp index 838a082af52d..4bfd6be1554b 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.cpp +++ b/ydb/core/tx/columnshard/engines/db_wrapper.cpp @@ -48,11 +48,12 @@ void TDbWrapper::WriteColumn(const NOlap::TPortionInfo& portion, const TColumnRe *rowProto.MutablePortionMeta() = std::move(*proto); } using IndexColumns = NColumnShard::Schema::IndexColumns; + auto removeSnapshot = portion.GetRemoveSnapshotOptional(); db.Table().Key(0, portion.GetDeprecatedGranuleId(), row.ColumnId, - portion.GetMinSnapshot().GetPlanStep(), portion.GetMinSnapshot().GetTxId(), portion.GetPortion(), row.Chunk).Update( - NIceDb::TUpdate(portion.GetRemoveSnapshot().GetPlanStep()), - NIceDb::TUpdate(portion.GetRemoveSnapshot().GetTxId()), - NIceDb::TUpdate(row.SerializedBlobId()), + portion.GetMinSnapshotDeprecated().GetPlanStep(), portion.GetMinSnapshotDeprecated().GetTxId(), portion.GetPortion(), row.Chunk).Update( + NIceDb::TUpdate(removeSnapshot ? removeSnapshot->GetPlanStep() : 0), + NIceDb::TUpdate(removeSnapshot ? removeSnapshot->GetTxId() : 0), + NIceDb::TUpdate(portion.GetBlobId(row.GetBlobRange().GetBlobIdxVerified()).SerializeBinary()), NIceDb::TUpdate(rowProto.SerializeAsString()), NIceDb::TUpdate(row.BlobRange.Offset), NIceDb::TUpdate(row.BlobRange.Size), @@ -64,7 +65,7 @@ void TDbWrapper::EraseColumn(const NOlap::TPortionInfo& portion, const TColumnRe NIceDb::TNiceDb db(Database); using IndexColumns = NColumnShard::Schema::IndexColumns; db.Table().Key(0, portion.GetDeprecatedGranuleId(), row.ColumnId, - portion.GetMinSnapshot().GetPlanStep(), portion.GetMinSnapshot().GetTxId(), portion.GetPortion(), row.Chunk).Delete(); + portion.GetMinSnapshotDeprecated().GetPlanStep(), portion.GetMinSnapshotDeprecated().GetTxId(), portion.GetPortion(), row.Chunk).Delete(); } bool TDbWrapper::LoadColumns(const std::function& callback) { @@ -78,7 +79,7 @@ bool TDbWrapper::LoadColumns(const std::function()); - portion.SetMinSnapshot(rowset.GetValue(), rowset.GetValue()); + portion.SetMinSnapshotDeprecated(NOlap::TSnapshot(rowset.GetValue(), rowset.GetValue())); portion.SetPortion(rowset.GetValue()); portion.SetDeprecatedGranuleId(rowset.GetValue()); @@ -100,7 +101,7 @@ void TDbWrapper::WriteIndex(const TPortionInfo& portion, const TIndexChunk& row) using IndexIndexes = NColumnShard::Schema::IndexIndexes; NIceDb::TNiceDb db(Database); db.Table().Key(portion.GetPathId(), portion.GetPortionId(), row.GetIndexId(), row.GetChunkIdx()).Update( - NIceDb::TUpdate(row.GetBlobRange().BlobId.SerializeBinary()), + NIceDb::TUpdate(portion.GetBlobId(row.GetBlobRange().GetBlobIdxVerified()).SerializeBinary()), NIceDb::TUpdate(row.GetBlobRange().Offset), NIceDb::TUpdate(row.GetBlobRange().Size), NIceDb::TUpdate(row.GetRecordsCount()), diff --git a/ydb/core/tx/columnshard/engines/db_wrapper.h b/ydb/core/tx/columnshard/engines/db_wrapper.h index 743f43272b20..6e23ee5a67a0 100644 --- a/ydb/core/tx/columnshard/engines/db_wrapper.h +++ b/ydb/core/tx/columnshard/engines/db_wrapper.h @@ -1,5 +1,6 @@ #pragma once #include "defs.h" +#include namespace NKikimr::NTable { class TDatabase; @@ -11,7 +12,7 @@ class TColumnChunkLoadContext; class TIndexChunkLoadContext; struct TInsertedData; class TInsertTableAccessor; -struct TColumnRecord; +class TColumnRecord; class TIndexChunk; struct TGranuleRecord; class IColumnEngine; diff --git a/ydb/core/tx/columnshard/engines/defs.h b/ydb/core/tx/columnshard/engines/defs.h index cb0ca8ee6ae7..a01edc7ef767 100644 --- a/ydb/core/tx/columnshard/engines/defs.h +++ b/ydb/core/tx/columnshard/engines/defs.h @@ -15,14 +15,6 @@ inline TWriteId operator++(TWriteId& w) noexcept { return w; } -class IBlobGroupSelector { -protected: - virtual ~IBlobGroupSelector() = default; - -public: - virtual ui32 GetGroup(const TLogoBlobID& blobId) const = 0; -}; - } // namespace NKikimr::NOlap template <> diff --git a/ydb/core/tx/columnshard/engines/filter.cpp b/ydb/core/tx/columnshard/engines/filter.cpp index bcfa6c7f109f..67dfb8e5ae71 100644 --- a/ydb/core/tx/columnshard/engines/filter.cpp +++ b/ydb/core/tx/columnshard/engines/filter.cpp @@ -1,6 +1,6 @@ #include "filter.h" #include "defs.h" -#include "reader/read_metadata.h" +#include "scheme/abstract/index_info.h" #include #include @@ -82,8 +82,8 @@ class TTableSnapshotGetter { template NArrow::TColumnFilter MakeSnapshotFilterImpl(const std::shared_ptr& batch, const TSnapshot& snapshot) { Y_ABORT_UNLESS(batch); - auto steps = batch->GetColumnByName(TIndexInfo::SPEC_COL_PLAN_STEP); - auto ids = batch->GetColumnByName(TIndexInfo::SPEC_COL_TX_ID); + auto steps = batch->GetColumnByName(IIndexInfo::SPEC_COL_PLAN_STEP); + auto ids = batch->GetColumnByName(IIndexInfo::SPEC_COL_TX_ID); NArrow::TColumnFilter result = NArrow::TColumnFilter::BuildAllowFilter(); TGetter getter(steps, ids, snapshot); result.Reset(steps->length(), std::move(getter)); @@ -123,18 +123,4 @@ NArrow::TColumnFilter MakeSnapshotFilter(const std::shared_ptr(batch, snapshot); } -NArrow::TColumnFilter FilterPortion(const std::shared_ptr& portion, const TReadMetadata& readMetadata, const bool useSnapshotFilter) { - Y_ABORT_UNLESS(portion); - NArrow::TColumnFilter result = readMetadata.GetPKRangesFilter().BuildFilter(portion); - if (readMetadata.GetSnapshot().GetPlanStep() && useSnapshotFilter) { - result = result.And(MakeSnapshotFilter(portion, readMetadata.GetSnapshot())); - } - - return result; -} - -NArrow::TColumnFilter FilterNotIndexed(const std::shared_ptr& batch, const TReadMetadata& readMetadata) { - return readMetadata.GetPKRangesFilter().BuildFilter(batch); -} - } diff --git a/ydb/core/tx/columnshard/engines/filter.h b/ydb/core/tx/columnshard/engines/filter.h index 22429208a8d0..7670b0eab1d3 100644 --- a/ydb/core/tx/columnshard/engines/filter.h +++ b/ydb/core/tx/columnshard/engines/filter.h @@ -11,7 +11,4 @@ NArrow::TColumnFilter MakeSnapshotFilter(const std::shared_ptr& batch, const TSnapshot& snapshot); struct TReadMetadata; -NArrow::TColumnFilter FilterPortion(const std::shared_ptr& batch, const TReadMetadata& readMetadata, const bool useSnapshotFilter); -NArrow::TColumnFilter FilterNotIndexed(const std::shared_ptr& batch, const TReadMetadata& readMetadata); - } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/insert_table/data.h b/ydb/core/tx/columnshard/engines/insert_table/data.h index ca726640b24d..c6cea97be5a6 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/data.h +++ b/ydb/core/tx/columnshard/engines/insert_table/data.h @@ -24,6 +24,7 @@ struct TInsertedData { }; std::shared_ptr BlobDataGuard; + public: ui64 PlanStep = 0; ui64 WriteTxId = 0; @@ -41,6 +42,10 @@ struct TInsertedData { } } + ui64 GetTxVolume() const { + return Meta.GetTxVolume() + sizeof(TBlobRange); + } + const TInsertedDataMeta& GetMeta() const { return Meta; } @@ -127,7 +132,8 @@ class TCommittedBlob { private: TBlobRange BlobRange; TSnapshot CommitSnapshot; - YDB_READONLY_DEF(ui64, SchemaVersion); + YDB_READONLY(ui64, SchemaVersion, 0); + YDB_READONLY(ui64, RecordsCount, 0); YDB_READONLY_DEF(std::optional, First); YDB_READONLY_DEF(std::optional, Last); public: @@ -145,15 +151,16 @@ class TCommittedBlob { return *Last; } - TCommittedBlob(const TBlobRange& blobRange, const TSnapshot& snapshot, const ui64 schemaVersion, const std::optional& first, const std::optional& last) + TCommittedBlob(const TBlobRange& blobRange, const TSnapshot& snapshot, const ui64 schemaVersion, const ui64 recordsCount, const std::optional& first, const std::optional& last) : BlobRange(blobRange) , CommitSnapshot(snapshot) , SchemaVersion(schemaVersion) + , RecordsCount(recordsCount) , First(first) , Last(last) {} - /// It uses trick then we place key wtih planStep:txId in container and find them later by BlobId only. + /// It uses trick then we place key with planStep:txId in container and find them later by BlobId only. /// So hash() and equality should depend on BlobId only. bool operator == (const TCommittedBlob& key) const { return BlobRange == key.BlobRange; } ui64 Hash() const noexcept { return BlobRange.Hash(); } diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp index 02eaedbeb918..91af9a46140d 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.cpp @@ -79,17 +79,29 @@ THashSet TInsertTable::DropPath(IDbWrapper& dbTable, ui64 pathId) { return Summary.GetInsertedByPathId(pathId); } -void TInsertTable::EraseCommitted(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { - if (Summary.EraseCommitted(data)) { - RemoveBlobLink(data.GetBlobRange().BlobId, blobsAction); +void TInsertTable::EraseCommittedOnExecute(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { + if (Summary.HasCommitted(data)) { dbTable.EraseCommitted(data); + RemoveBlobLinkOnExecute(data.GetBlobRange().BlobId, blobsAction); } } -void TInsertTable::EraseAborted(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { - if (Summary.EraseAborted((TWriteId)data.WriteTxId)) { - RemoveBlobLink(data.GetBlobRange().BlobId, blobsAction); +void TInsertTable::EraseCommittedOnComplete(const TInsertedData& data) { + if (Summary.EraseCommitted(data)) { + RemoveBlobLinkOnComplete(data.GetBlobRange().BlobId); + } +} + +void TInsertTable::EraseAbortedOnExecute(IDbWrapper& dbTable, const TInsertedData& data, const std::shared_ptr& blobsAction) { + if (Summary.HasAborted((TWriteId)data.WriteTxId)) { dbTable.EraseAborted(data); + RemoveBlobLinkOnExecute(data.GetBlobRange().BlobId, blobsAction); + } +} + +void TInsertTable::EraseAbortedOnComplete(const TInsertedData& data) { + if (Summary.EraseAborted((TWriteId)data.WriteTxId)) { + RemoveBlobLinkOnComplete(data.GetBlobRange().BlobId); } } @@ -121,19 +133,30 @@ std::vector TInsertTable::Read(ui64 pathId, const TSnapshot& sna std::vector result; result.reserve(ret.size()); for (auto&& i : ret) { - result.emplace_back(TCommittedBlob(i->GetBlobRange(), i->GetSnapshot(), i->GetSchemaVersion(), i->GetMeta().GetFirstPK(pkSchema), i->GetMeta().GetLastPK(pkSchema))); + result.emplace_back(TCommittedBlob(i->GetBlobRange(), i->GetSnapshot(), i->GetSchemaVersion(), i->GetMeta().GetNumRows(), i->GetMeta().GetFirstPK(pkSchema), i->GetMeta().GetLastPK(pkSchema))); } return result; } -bool TInsertTableAccessor::RemoveBlobLink(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction) { +bool TInsertTableAccessor::RemoveBlobLinkOnExecute(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction) { AFL_VERIFY(blobsAction); auto itBlob = BlobLinks.find(blobId); AFL_VERIFY(itBlob != BlobLinks.end()); AFL_VERIFY(itBlob->second >= 1); if (itBlob->second == 1) { - blobsAction->DeclareRemove(itBlob->first); + blobsAction->DeclareSelfRemove(itBlob->first); + return true; + } else { + return false; + } +} + +bool TInsertTableAccessor::RemoveBlobLinkOnComplete(const TUnifiedBlobId& blobId) { + auto itBlob = BlobLinks.find(blobId); + AFL_VERIFY(itBlob != BlobLinks.end()); + AFL_VERIFY(itBlob->second >= 1); + if (itBlob->second == 1) { BlobLinks.erase(itBlob); return true; } else { diff --git a/ydb/core/tx/columnshard/engines/insert_table/insert_table.h b/ydb/core/tx/columnshard/engines/insert_table/insert_table.h index e7d523610fb2..373964818370 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/insert_table.h +++ b/ydb/core/tx/columnshard/engines/insert_table/insert_table.h @@ -21,12 +21,24 @@ class TInsertTableAccessor { ++BlobLinks[blobId]; } - bool RemoveBlobLink(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction); + bool RemoveBlobLinkOnExecute(const TUnifiedBlobId& blobId, const std::shared_ptr& blobsAction); + bool RemoveBlobLinkOnComplete(const TUnifiedBlobId& blobId); public: const std::map>& GetPathPriorities() const { return Summary.GetPathPriorities(); } + std::optional GetMinCommittedSnapshot(const ui64 pathId) const { + auto* info = Summary.GetPathInfoOptional(pathId); + if (!info) { + return {}; + } else if (info->GetCommitted().empty()) { + return {}; + } else { + return info->GetCommitted().begin()->GetSnapshot(); + } + } + bool AddInserted(TInsertedData&& data, const bool load) { if (load) { AddBlobLink(data.GetBlobRange().BlobId); @@ -72,8 +84,13 @@ class TInsertTable: public TInsertTableAccessor { void Abort(IDbWrapper& dbTable, const THashSet& writeIds); THashSet OldWritesToAbort(const TInstant& now) const; THashSet DropPath(IDbWrapper& dbTable, ui64 pathId); - void EraseCommitted(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); - void EraseAborted(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); + + void EraseCommittedOnExecute(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); + void EraseCommittedOnComplete(const TInsertedData& key); + + void EraseAbortedOnExecute(IDbWrapper& dbTable, const TInsertedData& key, const std::shared_ptr& blobsAction); + void EraseAbortedOnComplete(const TInsertedData& key); + std::vector Read(ui64 pathId, const TSnapshot& snapshot, const std::shared_ptr& pkSchema) const; bool Load(IDbWrapper& dbTable, const TInstant loadTime); }; diff --git a/ydb/core/tx/columnshard/engines/insert_table/meta.h b/ydb/core/tx/columnshard/engines/insert_table/meta.h index 40c7d59dd1c3..b513288eb787 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/meta.h +++ b/ydb/core/tx/columnshard/engines/insert_table/meta.h @@ -20,6 +20,10 @@ class TInsertedDataMeta { const std::optional& GetSpecialKeys() const; public: + ui64 GetTxVolume() const { + return 2 * sizeof(ui64) + sizeof(ui32) + sizeof(OriginalProto) + (SpecialKeysParsed ? SpecialKeysParsed->GetMemoryBytes() : 0); + } + TInsertedDataMeta(const NKikimrTxColumnShard::TLogicalMetadata& proto) : OriginalProto(proto) { diff --git a/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp b/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp index 4c46193e1537..31be2b8a8fa3 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/path_info.cpp @@ -39,6 +39,10 @@ bool TPathInfo::EraseCommitted(const TInsertedData& data) { return result; } +bool TPathInfo::HasCommitted(const TInsertedData& data) { + return Committed.contains(data); +} + bool TPathInfo::AddCommitted(TInsertedData&& data, const bool load) { const ui64 dataSize = data.BlobSize(); Summary->RemovePriority(*this); diff --git a/ydb/core/tx/columnshard/engines/insert_table/path_info.h b/ydb/core/tx/columnshard/engines/insert_table/path_info.h index 242126faf3d2..5e44929307c4 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/path_info.h +++ b/ydb/core/tx/columnshard/engines/insert_table/path_info.h @@ -62,6 +62,7 @@ class TPathInfo: public TMoveOnly { TPathInfoIndexPriority GetIndexationPriority() const; bool EraseCommitted(const TInsertedData& data); + bool HasCommitted(const TInsertedData& data); const TSet& GetCommitted() const { return Committed; diff --git a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp index 84eae30941f5..4723ac8da5c6 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp +++ b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.cpp @@ -140,6 +140,14 @@ bool TInsertionSummary::EraseAborted(const TWriteId writeId) { return true; } +bool TInsertionSummary::HasAborted(const TWriteId writeId) { + auto it = Aborted.find(writeId); + if (it == Aborted.end()) { + return false; + } + return true; +} + bool TInsertionSummary::EraseCommitted(const TInsertedData& data) { TPathInfo* pathInfo = GetPathInfoOptional(data.PathId); if (!pathInfo) { @@ -155,6 +163,14 @@ bool TInsertionSummary::EraseCommitted(const TInsertedData& data) { } } +bool TInsertionSummary::HasCommitted(const TInsertedData& data) { + TPathInfo* pathInfo = GetPathInfoOptional(data.PathId); + if (!pathInfo) { + return false; + } + return pathInfo->HasCommitted(data); +} + const NKikimr::NOlap::TInsertedData* TInsertionSummary::AddAborted(TInsertedData&& data, const bool load /*= false*/) { const TWriteId writeId((TWriteId)data.WriteTxId); Counters.Aborted.Add(data.BlobSize(), load); diff --git a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h index 3ae529fca4fc..291886bd81bd 100644 --- a/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h +++ b/ydb/core/tx/columnshard/engines/insert_table/rt_insertion.h @@ -47,8 +47,10 @@ class TInsertionSummary { const TInsertedData* AddAborted(TInsertedData&& data, const bool load = false); bool EraseAborted(const TWriteId writeId); + bool HasAborted(const TWriteId writeId); bool EraseCommitted(const TInsertedData& data); + bool HasCommitted(const TInsertedData& data); const TInsertedData* AddInserted(TInsertedData&& data, const bool load = false); std::optional ExtractInserted(const TWriteId id); diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.cpp b/ydb/core/tx/columnshard/engines/portions/column_record.cpp index 07d9eb3bd397..e4fbef70c5fc 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.cpp +++ b/ydb/core/tx/columnshard/engines/portions/column_record.cpp @@ -1,38 +1,42 @@ #include "column_record.h" + #include + +#include #include #include #include namespace NKikimr::NOlap { -TChunkMeta::TChunkMeta(const TColumnChunkLoadContext& context, const TIndexInfo& indexInfo) { - auto field = indexInfo.ArrowColumnFieldOptional(context.GetAddress().GetColumnId()); - if (context.GetMetaProto().HasNumRows()) { - NumRows = context.GetMetaProto().GetNumRows(); +TConclusionStatus TChunkMeta::DeserializeFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo) { + auto field = columnInfo.GetArrowField(); + if (proto.HasNumRows()) { + NumRows = proto.GetNumRows(); } - if (context.GetMetaProto().HasRawBytes()) { - RawBytes = context.GetMetaProto().GetRawBytes(); + if (proto.HasRawBytes()) { + RawBytes = proto.GetRawBytes(); } - if (context.GetMetaProto().HasMaxValue()) { - AFL_VERIFY(field)("field_id", context.GetAddress().GetColumnId())("field_name", indexInfo.GetColumnName(context.GetAddress().GetColumnId())); - Max = ConstantToScalar(context.GetMetaProto().GetMaxValue(), field->type()); + if (proto.HasMaxValue()) { + AFL_VERIFY(field)("field_id", address.GetColumnId())("field_name", columnInfo.GetColumnName()); + Max = ConstantToScalar(proto.GetMaxValue(), field->type()); } + return TConclusionStatus::Success(); +} + +TChunkMeta::TChunkMeta(const TColumnChunkLoadContext& context, const TSimpleColumnInfo& columnInfo) { + DeserializeFromProto(context.GetAddress(), context.GetMetaProto(), columnInfo).Validate(); } -TChunkMeta::TChunkMeta(const std::shared_ptr& column, const ui32 columnId, const TIndexInfo& indexInfo) - : TBase(column, indexInfo.GetMinMaxIdxColumns().contains(columnId), indexInfo.IsSortedColumn(columnId)) +TChunkMeta::TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) + : TBase(column, columnInfo.GetNeedMinMax(), columnInfo.GetIsSorted()) { } NKikimrTxColumnShard::TIndexColumnMeta TChunkMeta::SerializeToProto() const { NKikimrTxColumnShard::TIndexColumnMeta meta; - if (NumRows) { - meta.SetNumRows(*NumRows); - } - if (RawBytes) { - meta.SetRawBytes(*RawBytes); - } + meta.SetNumRows(NumRows); + meta.SetRawBytes(RawBytes); if (HasMax()) { ScalarToConstant(*Max, *meta.MutableMaxValue()); ScalarToConstant(*Max, *meta.MutableMinValue()); @@ -40,19 +44,47 @@ NKikimrTxColumnShard::TIndexColumnMeta TChunkMeta::SerializeToProto() const { return meta; } -TColumnRecord::TColumnRecord(const TColumnChunkLoadContext& loadContext, const TIndexInfo& info) - : Meta(loadContext, info) +TColumnRecord::TColumnRecord(const TBlobRangeLink16::TLinkId blobLinkId, const TColumnChunkLoadContext& loadContext, const TSimpleColumnInfo& columnInfo) + : Meta(loadContext, columnInfo) , ColumnId(loadContext.GetAddress().GetColumnId()) , Chunk(loadContext.GetAddress().GetChunk()) - , BlobRange(loadContext.GetBlobRange()) + , BlobRange(loadContext.GetBlobRange().BuildLink(blobLinkId)) { } -TColumnRecord::TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TIndexInfo& info) - : Meta(column, address.GetColumnId(), info) +TColumnRecord::TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo) + : Meta(column, columnInfo) , ColumnId(address.GetColumnId()) , Chunk(address.GetChunk()) { } +NKikimrColumnShardDataSharingProto::TColumnRecord TColumnRecord::SerializeToProto() const { + NKikimrColumnShardDataSharingProto::TColumnRecord result; + result.SetColumnId(ColumnId); + result.SetChunkIdx(Chunk); + *result.MutableMeta() = Meta.SerializeToProto(); + *result.MutableBlobRange() = BlobRange.SerializeToProto(); + return result; +} + +NKikimr::TConclusionStatus TColumnRecord::DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo) { + ColumnId = proto.GetColumnId(); + Chunk = proto.GetChunkIdx(); + { + auto parse = Meta.DeserializeFromProto(GetAddress(), proto.GetMeta(), columnInfo); + if (!parse) { + return parse; + } + } + { + auto parsed = TBlobRangeLink16::BuildFromProto(proto.GetBlobRange()); + if (!parsed) { + return parsed; + } + BlobRange = parsed.DetachResult(); + } + return TConclusionStatus::Success(); +} + } diff --git a/ydb/core/tx/columnshard/engines/portions/column_record.h b/ydb/core/tx/columnshard/engines/portions/column_record.h index 219e6f8b5316..9d958055cbf9 100644 --- a/ydb/core/tx/columnshard/engines/portions/column_record.h +++ b/ydb/core/tx/columnshard/engines/portions/column_record.h @@ -1,49 +1,38 @@ #pragma once #include "common.h" + #include + #include #include #include #include +#include + #include -#include + #include #include #include +#include + +namespace NKikimrColumnShardDataSharingProto { +class TColumnRecord; +} + namespace NKikimr::NOlap { class TColumnChunkLoadContext; struct TIndexInfo; - -class TIndexChunk { -private: - YDB_READONLY(ui32, IndexId, 0); - YDB_READONLY(ui32, ChunkIdx, 0); - YDB_READONLY(ui32, RecordsCount, 0); - YDB_READONLY(ui32, RawBytes, 0); - YDB_READONLY_DEF(TBlobRange, BlobRange); - -public: - TIndexChunk(const ui32 indexId, const ui32 chunkIdx, const ui32 recordsCount, const ui64 rawBytes, const TBlobRange& blobRange) - : IndexId(indexId) - , ChunkIdx(chunkIdx) - , RecordsCount(recordsCount) - , RawBytes(rawBytes) - , BlobRange(blobRange) { - - } - - void RegisterBlobId(const TUnifiedBlobId& blobId) { -// AFL_VERIFY(!BlobRange.BlobId.GetTabletId())("original", BlobRange.BlobId.ToStringNew())("new", blobId.ToStringNew()); - BlobRange.BlobId = blobId; - } -}; +class TColumnRecord; struct TChunkMeta: public TSimpleChunkMeta { private: using TBase = TSimpleChunkMeta; TChunkMeta() = default; + [[nodiscard]] TConclusionStatus DeserializeFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo); + friend class TColumnRecord; public: TChunkMeta(TSimpleChunkMeta&& baseMeta) : TBase(baseMeta) @@ -51,6 +40,15 @@ struct TChunkMeta: public TSimpleChunkMeta { } + [[nodiscard]] static TConclusion BuildFromProto(const TChunkAddress& address, const NKikimrTxColumnShard::TIndexColumnMeta& proto, const TSimpleColumnInfo& columnInfo) { + TChunkMeta result; + auto parse = result.DeserializeFromProto(address, proto, columnInfo); + if (!parse) { + return parse; + } + return result; + } + NKikimrTxColumnShard::TIndexColumnMeta SerializeToProto() const; class TTestInstanceBuilder { @@ -63,12 +61,12 @@ struct TChunkMeta: public TSimpleChunkMeta { } }; - TChunkMeta(const TColumnChunkLoadContext& context, const TIndexInfo& indexInfo); + TChunkMeta(const TColumnChunkLoadContext& context, const TSimpleColumnInfo& columnInfo); - TChunkMeta(const std::shared_ptr& column, const ui32 columnId, const TIndexInfo& indexInfo); + TChunkMeta(const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); }; -struct TColumnRecord { +class TColumnRecord { private: TChunkMeta Meta; TColumnRecord(TChunkMeta&& meta) @@ -76,18 +74,28 @@ struct TColumnRecord { { } + + TColumnRecord() = default; + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo); public: ui32 ColumnId = 0; ui16 Chunk = 0; - TBlobRange BlobRange; + TBlobRangeLink16 BlobRange; + ui32 GetEntityId() const { + return ColumnId; + } - void RegisterBlobId(const TUnifiedBlobId& blobId) { + void ResetBlobRange() { + BlobRange = TBlobRangeLink16(); + } + + void RegisterBlobIdx(const ui16 blobIdx) { // AFL_VERIFY(!BlobRange.BlobId.GetTabletId())("original", BlobRange.BlobId.ToStringNew())("new", blobId.ToStringNew()); - BlobRange.BlobId = blobId; + BlobRange.BlobIdx = blobIdx; } - TColumnRecord(const TChunkAddress& address, const TBlobRange& range, TChunkMeta&& meta) + TColumnRecord(const TChunkAddress& address, const TBlobRangeLink16& range, TChunkMeta&& meta) : Meta(std::move(meta)) , ColumnId(address.GetColumnId()) , Chunk(address.GetChunk()) @@ -114,6 +122,19 @@ struct TColumnRecord { ui16 GetChunkIdx() const { return Chunk; } + const TBlobRangeLink16& GetBlobRange() const { + return BlobRange; + } + + NKikimrColumnShardDataSharingProto::TColumnRecord SerializeToProto() const; + static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TColumnRecord& proto, const TSimpleColumnInfo& columnInfo) { + TColumnRecord result; + auto parse = result.DeserializeFromProto(proto, columnInfo); + if (!parse) { + return parse; + } + return result; + } TColumnSerializationStat GetSerializationStat(const std::string& columnName) const { TColumnSerializationStat result(ColumnId, columnName); @@ -122,7 +143,7 @@ struct TColumnRecord { } TSimpleSerializationStat GetSerializationStat() const { - return TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRowsVerified(), Meta.GetRawBytesVerified()); + return TSimpleSerializationStat(BlobRange.Size, Meta.GetNumRows(), Meta.GetRawBytes()); } const TChunkMeta& GetMeta() const { @@ -138,11 +159,7 @@ struct TColumnRecord { } bool Valid() const { - return ColumnId && ValidBlob(); - } - - TString SerializedBlobId() const { - return BlobRange.BlobId.SerializeBinary(); + return ColumnId && BlobRange.IsValid(); } TString DebugString() const { @@ -152,13 +169,9 @@ struct TColumnRecord { ; } - bool ValidBlob() const { - return BlobRange.BlobId.IsValid() && BlobRange.Size; - } - - TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TIndexInfo& info); + TColumnRecord(const TChunkAddress& address, const std::shared_ptr& column, const TSimpleColumnInfo& columnInfo); - TColumnRecord(const TColumnChunkLoadContext& loadContext, const TIndexInfo& info); + TColumnRecord(const TBlobRangeLink16::TLinkId blobLinkId, const TColumnChunkLoadContext& loadContext, const TSimpleColumnInfo& columnInfo); friend IOutputStream& operator << (IOutputStream& out, const TColumnRecord& rec) { out << '{'; @@ -179,14 +192,21 @@ class TSimpleOrderedColumnChunk: public IPortionColumnChunk { YDB_READONLY_DEF(TString, Data); protected: virtual TString DoDebugString() const override { - return TStringBuilder() << "column_id=" << GetColumnId() << ";chunk=" << GetChunkIdx() << ";data_size=" << Data.size() << ";"; + TStringBuilder sb; + sb << "column_id=" << GetColumnId() << ";data_size=" << Data.size() << ";"; + if (GetChunkIdxOptional()) { + sb << "chunk=" << GetChunkIdxVerified() << ";"; + } else { + sb << "chunk=NO_INITIALIZED;"; + } + return sb; } virtual const TString& DoGetData() const override { return Data; } virtual ui32 DoGetRecordsCountImpl() const override { - return ColumnRecord.GetMeta().GetNumRowsVerified(); + return ColumnRecord.GetMeta().GetNumRows(); } virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { diff --git a/ydb/core/tx/columnshard/engines/portions/common.h b/ydb/core/tx/columnshard/engines/portions/common.h index 311563918469..1231a1e9f5f3 100644 --- a/ydb/core/tx/columnshard/engines/portions/common.h +++ b/ydb/core/tx/columnshard/engines/portions/common.h @@ -33,5 +33,11 @@ class TChunkAddress { TString DebugString() const; }; - } + +template<> +struct ::THash { + inline ui64 operator()(const NKikimr::NOlap::TChunkAddress& a) const { + return ((ui64)a.GetEntityId()) << 16 + a.GetChunkIdx(); + } +}; diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp b/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp new file mode 100644 index 000000000000..4aeaa20dd20e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.cpp @@ -0,0 +1,39 @@ +#include "index_chunk.h" +#include + +namespace NKikimr::NOlap { + +NKikimr::TConclusionStatus TIndexChunk::DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto) { + IndexId = proto.GetIndexId(); + ChunkIdx = proto.GetChunkIdx(); + { + if (!proto.HasMeta()) { + return TConclusionStatus::Fail("no meta information"); + } + RecordsCount = proto.GetMeta().GetRecordsCount(); + RawBytes = proto.GetMeta().GetRawBytes(); + } + { + auto parsed = TBlobRangeLink16::BuildFromProto(proto.GetBlobRange()); + if (!parsed) { + return parsed; + } + BlobRange = parsed.DetachResult(); + } + return TConclusionStatus::Success(); +} + +NKikimrColumnShardDataSharingProto::TIndexChunk TIndexChunk::SerializeToProto() const { + NKikimrColumnShardDataSharingProto::TIndexChunk result; + result.SetIndexId(IndexId); + result.SetChunkIdx(ChunkIdx); + { + auto* meta = result.MutableMeta(); + meta->SetRecordsCount(RecordsCount); + meta->SetRawBytes(RawBytes); + } + *result.MutableBlobRange() = BlobRange.SerializeToProto(); + return result; +} + +} diff --git a/ydb/core/tx/columnshard/engines/portions/index_chunk.h b/ydb/core/tx/columnshard/engines/portions/index_chunk.h new file mode 100644 index 000000000000..6a71704318ca --- /dev/null +++ b/ydb/core/tx/columnshard/engines/portions/index_chunk.h @@ -0,0 +1,73 @@ +#pragma once + +#include "common.h" + +#include + +#include +#include +#include +#include + +#include + +#include +#include +#include + +#include + +namespace NKikimrColumnShardDataSharingProto { +class TIndexChunk; +} + +namespace NKikimr::NOlap { +struct TIndexInfo; + +class TIndexChunk { +private: + YDB_READONLY(ui32, IndexId, 0); + YDB_READONLY(ui32, ChunkIdx, 0); + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY(ui32, RawBytes, 0); + YDB_READONLY_DEF(TBlobRangeLink16, BlobRange); + + TIndexChunk() = default; + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto); +public: + TChunkAddress GetAddress() const { + return TChunkAddress(IndexId, ChunkIdx); + } + + ui32 GetEntityId() const { + return IndexId; + } + + TIndexChunk(const ui32 indexId, const ui32 chunkIdx, const ui32 recordsCount, const ui64 rawBytes, const TBlobRangeLink16& blobRange) + : IndexId(indexId) + , ChunkIdx(chunkIdx) + , RecordsCount(recordsCount) + , RawBytes(rawBytes) + , BlobRange(blobRange) { + + } + + void RegisterBlobIdx(const TBlobRangeLink16::TLinkId blobLinkId) { +// AFL_VERIFY(!BlobRange.BlobId.GetTabletId())("original", BlobRange.BlobId.ToStringNew())("new", blobId.ToStringNew()); + BlobRange.BlobIdx = blobLinkId; + } + + static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TIndexChunk& proto) { + TIndexChunk result; + auto parse = result.DeserializeFromProto(proto); + if (!parse) { + return parse; + } + return result; + } + + NKikimrColumnShardDataSharingProto::TIndexChunk SerializeToProto() const; + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/portions/meta.cpp b/ydb/core/tx/columnshard/engines/portions/meta.cpp index fde6d7135c35..282211dd5cda 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.cpp +++ b/ydb/core/tx/columnshard/engines/portions/meta.cpp @@ -35,6 +35,15 @@ bool TPortionMeta::DeserializeFromProto(const NKikimrTxColumnShard::TIndexPortio AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", "parsing duplication"); return true; } + FirstPkColumn = indexInfo.GetPKFirstColumnId(); + { + auto parsed = NStatistics::TPortionStorage::BuildFromProto(portionMeta.GetStatisticsStorage()); + if (!parsed) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "DeserializeFromProto")("error", parsed.GetErrorMessage()); + return false; + } + StatisticsStorage = parsed.DetachResult(); + } TierName = portionMeta.GetTierName(); if (portionMeta.GetIsInserted()) { Produced = TPortionMeta::EProduced::INSERTED; @@ -68,14 +77,10 @@ bool TPortionMeta::DeserializeFromProto(const NKikimrTxColumnShard::TIndexPortio return true; } -std::optional TPortionMeta::SerializeToProto(const ui32 columnId, const ui32 chunk) const { - if (!IsChunkWithPortionInfo(columnId, chunk)) { - return {}; - } - +NKikimrTxColumnShard::TIndexPortionMeta TPortionMeta::SerializeToProto() const { NKikimrTxColumnShard::TIndexPortionMeta portionMeta; portionMeta.SetTierName(TierName); - + *portionMeta.MutableStatisticsStorage() = StatisticsStorage.SerializeToProto(); switch (Produced) { case TPortionMeta::EProduced::UNSPECIFIED: Y_ABORT_UNLESS(false); @@ -112,6 +117,14 @@ std::optional TPortionMeta::SerializeTo return portionMeta; } +std::optional TPortionMeta::SerializeToProto(const ui32 columnId, const ui32 chunk) const { + if (!IsChunkWithPortionInfo(columnId, chunk)) { + return {}; + } + + return SerializeToProto(); +} + TString TPortionMeta::DebugString() const { TStringBuilder sb; sb << "(produced=" << Produced << ";"; diff --git a/ydb/core/tx/columnshard/engines/portions/meta.h b/ydb/core/tx/columnshard/engines/portions/meta.h index 005021f7c457..8a715bca23d9 100644 --- a/ydb/core/tx/columnshard/engines/portions/meta.h +++ b/ydb/core/tx/columnshard/engines/portions/meta.h @@ -1,6 +1,7 @@ #pragma once #include #include +#include #include #include #include @@ -15,6 +16,7 @@ struct TPortionMeta { private: std::shared_ptr ReplaceKeyEdges; // first and last PK rows YDB_ACCESSOR_DEF(TString, TierName); + YDB_READONLY_DEF(NStatistics::TPortionStorage, StatisticsStorage); public: using EProduced = NPortion::EProduced; @@ -26,6 +28,19 @@ struct TPortionMeta { EProduced Produced{EProduced::UNSPECIFIED}; ui32 FirstPkColumn = 0; + ui64 GetMetadataMemorySize() const { + return sizeof(TPortionMeta) + ReplaceKeyEdges->GetMemorySize(); + } + + void SetStatisticsStorage(NStatistics::TPortionStorage&& storage) { + AFL_VERIFY(StatisticsStorage.IsEmpty()); + StatisticsStorage = std::move(storage); + } + + void ResetStatisticsStorage(NStatistics::TPortionStorage&& storage) { + StatisticsStorage = std::move(storage); + } + bool IsChunkWithPortionInfo(const ui32 columnId, const ui32 chunkIdx) const { return columnId == FirstPkColumn && chunkIdx == 0; } @@ -33,6 +48,7 @@ struct TPortionMeta { bool DeserializeFromProto(const NKikimrTxColumnShard::TIndexPortionMeta& portionMeta, const TIndexInfo& indexInfo); std::optional SerializeToProto(const ui32 columnId, const ui32 chunk) const; + NKikimrTxColumnShard::TIndexPortionMeta SerializeToProto() const; void FillBatchInfo(const NArrow::TFirstLastSpecialKeys& primaryKeys, const NArrow::TMinMaxSpecialKeys& snapshotKeys, const TIndexInfo& indexInfo); diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp index f21d12eaab75..c67ea7f67ec7 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.cpp +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.cpp @@ -1,11 +1,17 @@ #include "portion_info.h" -#include +#include +#include +#include #include +#include +#include +#include #include -#include #include #include +#include + namespace NKikimr::NOlap { const TColumnRecord& TPortionInfo::AppendOneChunkColumn(TColumnRecord&& record) { @@ -58,60 +64,38 @@ std::shared_ptr TPortionInfo::MaxValue(ui32 columnId) const { return result; } -TPortionInfo TPortionInfo::CopyWithFilteredColumns(const THashSet& columnIds) const { - TPortionInfo result(PathId, Portion, GetMinSnapshot(), BlobsOperator); - result.Meta = Meta; - result.Records.reserve(columnIds.size()); - - for (auto& rec : Records) { - Y_ABORT_UNLESS(rec.Valid()); - if (columnIds.contains(rec.ColumnId)) { - result.Records.push_back(rec); - } - } - return result; +ui64 TPortionInfo::GetColumnRawBytes(const std::vector& columnIds, const bool validation) const { + return GetColumnRawBytes(std::set(columnIds.begin(), columnIds.end()), validation); } -ui64 TPortionInfo::GetRawBytes(const std::vector& columnIds) const { +ui64 TPortionInfo::GetColumnRawBytes(const std::optional>& entityIds, const bool validation) const { ui64 sum = 0; - const ui32 numRows = NumRows(); - for (auto&& i : columnIds) { - if (TIndexInfo::IsSpecialColumn(i)) { - sum += numRows * TIndexInfo::GetSpecialColumnByteWidth(i); - } else { - for (auto&& r : Records) { - if (r.ColumnId == i) { - sum += r.GetMeta().GetRawBytesVerified(); - } - } - } - } + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetMeta().GetRawBytes(); + }; + AggregateIndexChunksData(aggr, Records, entityIds, validation); return sum; } -ui64 TPortionInfo::GetRawBytes(const std::set& entityIds) const { +ui64 TPortionInfo::GetColumnBlobBytes(const std::optional>& entityIds, const bool validation) const { ui64 sum = 0; - const ui32 numRows = NumRows(); - for (auto&& i : TIndexInfo::GetSpecialColumnIds()) { - if (entityIds.contains(i)) { - sum += numRows * TIndexInfo::GetSpecialColumnByteWidth(i); - } - } - for (auto&& r : Records) { - if (entityIds.contains(r.ColumnId)) { - sum += r.GetMeta().GetRawBytesVerified(); - } - } + const auto aggr = [&](const TColumnRecord& r) { + sum += r.GetBlobRange().GetSize(); + }; + AggregateIndexChunksData(aggr, Records, entityIds, validation); return sum; } -ui64 TPortionInfo::GetIndexBytes(const std::set& entityIds) const { +ui64 TPortionInfo::GetColumnBlobBytes(const std::vector& columnIds, const bool validation) const { + return GetColumnBlobBytes(std::set(columnIds.begin(), columnIds.end()), validation); +} + +ui64 TPortionInfo::GetIndexRawBytes(const std::optional>& entityIds, const bool validation) const { ui64 sum = 0; - for (auto&& r : Indexes) { - if (entityIds.contains(r.GetIndexId())) { - sum += r.GetBlobRange().Size; - } - } + const auto aggr = [&](const TIndexChunk& r) { + sum += r.GetRawBytes(); + }; + AggregateIndexChunksData(aggr, Indexes, entityIds, validation); return sum; } @@ -119,7 +103,8 @@ TString TPortionInfo::DebugString(const bool withDetails) const { TStringBuilder sb; sb << "(portion_id:" << Portion << ";" << "path_id:" << PathId << ";records_count:" << NumRows() << ";" - "min_schema_snapshot:(" << MinSnapshot.DebugString() << ");"; + "min_schema_snapshot:(" << MinSnapshotDeprecated.DebugString() << ");" + "schema_version:" << SchemaVersion.value_or(0) << ";"; if (withDetails) { sb << "records_snapshot_min:(" << RecordSnapshotMin().DebugString() << ");" << @@ -128,21 +113,20 @@ TString TPortionInfo::DebugString(const bool withDetails) const { "to:" << IndexKeyEnd().DebugString() << ";"; } sb << - "size:" << BlobsBytes() << ";" << + "column_size:" << GetColumnBlobBytes() << ";" << + "index_size:" << GetIndexBlobBytes() << ";" << "meta:(" << Meta.DebugString() << ");"; if (RemoveSnapshot.Valid()) { sb << "remove_snapshot:(" << RemoveSnapshot.DebugString() << ");"; } - if (BlobsOperator) { - sb << "blobs_operator:" << BlobsOperator->DebugString() << ";"; - } sb << "chunks:(" << Records.size() << ");"; if (IS_TRACE_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD)) { - std::set blobIds; + std::vector blobRanges; for (auto&& i : Records) { - blobIds.emplace(::ToString(i.BlobRange.BlobId)); + blobRanges.emplace_back(i.BlobRange); } - sb << "blobs:" << JoinSeq(",", blobIds) << ";blobs_count:" << blobIds.size() << ";"; + sb << "blobs:" << JoinSeq(",", blobRanges) << ";ranges_count:" << blobRanges.size() << ";"; + sb << "blob_ids:" << JoinSeq(",", BlobIds) << ";blobs_count:" << BlobIds.size() << ";"; } return sb << ")"; } @@ -161,23 +145,15 @@ std::vector TPortionInfo::GetColumnChunksP for (auto&& c : Records) { if (c.ColumnId == columnId) { Y_ABORT_UNLESS(c.Chunk == result.size()); - Y_ABORT_UNLESS(c.GetMeta().GetNumRowsVerified()); + Y_ABORT_UNLESS(c.GetMeta().GetNumRows()); result.emplace_back(&c); } } return result; } -size_t TPortionInfo::NumBlobs() const { - THashSet blobIds; - for (auto&& i : Records) { - blobIds.emplace(i.BlobRange.BlobId); - } - return blobIds.size(); -} - bool TPortionInfo::IsEqualWithSnapshots(const TPortionInfo& item) const { - return PathId == item.PathId && MinSnapshot == item.MinSnapshot + return PathId == item.PathId && MinSnapshotDeprecated == item.MinSnapshotDeprecated && Portion == item.Portion && RemoveSnapshot == item.RemoveSnapshot; } @@ -191,6 +167,7 @@ void TPortionInfo::RemoveFromDatabase(IDbWrapper& db) const { } void TPortionInfo::SaveToDatabase(IDbWrapper& db) const { + FullValidation(); for (auto& record : Records) { db.WriteColumn(*this, record); } @@ -226,9 +203,9 @@ std::vector TPortionInfo::BuildPages() cons currentSize = 0; currentId = i.GetColumnId(); } - currentSize += i.GetMeta().GetNumRowsVerified(); + currentSize += i.GetMeta().GetNumRows(); ++currentCursor[currentSize]; - entities[i.GetColumnId()].emplace_back(&i, i.GetMeta().GetNumRowsVerified()); + entities[i.GetColumnId()].emplace_back(&i, i.GetMeta().GetNumRows()); } for (auto&& i : Indexes) { if (currentId != i.GetIndexId()) { @@ -270,10 +247,489 @@ std::vector TPortionInfo::BuildPages() cons return pages; } +ui64 TPortionInfo::GetMetadataMemorySize() const { + return sizeof(TPortionInfo) + Records.size() * (sizeof(TColumnRecord) + 8) + Indexes.size() * sizeof(TIndexChunk) + BlobIds.size() * sizeof(TUnifiedBlobId) + - sizeof(TPortionMeta) + Meta.GetMetadataMemorySize(); +} + ui64 TPortionInfo::GetTxVolume() const { return 1024 + Records.size() * 256 + Indexes.size() * 256; } +void TPortionInfo::SerializeToProto(NKikimrColumnShardDataSharingProto::TPortionInfo& proto) const { + proto.SetPathId(PathId); + proto.SetPortionId(Portion); + proto.SetSchemaVersion(GetSchemaVersionVerified()); + *proto.MutableMinSnapshotDeprecated() = MinSnapshotDeprecated.SerializeToProto(); + if (!RemoveSnapshot.IsZero()) { + *proto.MutableRemoveSnapshot() = RemoveSnapshot.SerializeToProto(); + } + for (auto&& i : BlobIds) { + *proto.AddBlobIds() = i.SerializeToProto(); + } + + *proto.MutableMeta() = Meta.SerializeToProto(); + + for (auto&& r : Records) { + *proto.AddRecords() = r.SerializeToProto(); + } + + for (auto&& r : Indexes) { + *proto.AddIndexes() = r.SerializeToProto(); + } +} + +TConclusionStatus TPortionInfo::DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto, const TIndexInfo& info) { + PathId = proto.GetPathId(); + Portion = proto.GetPortionId(); + SchemaVersion = proto.GetSchemaVersion(); + for (auto&& i : proto.GetBlobIds()) { + auto blobId = TUnifiedBlobId::BuildFromProto(i); + if (!blobId) { + return blobId; + } + BlobIds.emplace_back(blobId.DetachResult()); + } + { + auto parse = MinSnapshotDeprecated.DeserializeFromProto(proto.GetMinSnapshotDeprecated()); + if (!parse) { + return parse; + } + } + if (proto.HasRemoveSnapshot()) { + auto parse = RemoveSnapshot.DeserializeFromProto(proto.GetRemoveSnapshot()); + if (!parse) { + return parse; + } + } + if (!Meta.DeserializeFromProto(proto.GetMeta(), info)) { + return TConclusionStatus::Fail("cannot parse meta"); + } + for (auto&& i : proto.GetRecords()) { + auto parse = TColumnRecord::BuildFromProto(i, info.GetColumnFeaturesVerified(i.GetColumnId())); + if (!parse) { + return parse; + } + Records.emplace_back(std::move(parse.DetachResult())); + } + for (auto&& i : proto.GetIndexes()) { + auto parse = TIndexChunk::BuildFromProto(i); + if (!parse) { + return parse; + } + Indexes.emplace_back(std::move(parse.DetachResult())); + } + return TConclusionStatus::Success(); +} + +TConclusion TPortionInfo::BuildFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto, const TIndexInfo& info) { + TPortionInfo result; + auto parse = result.DeserializeFromProto(proto, info); + if (!parse) { + return parse; + } + return result; +} + +THashMap TPortionInfo::DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const { + THashMap result; + for (auto&& i : blobs) { + for (auto&& b : i.second) { + bool found = false; + TString columnStorageId; + ui32 columnId = 0; + for (auto&& record : Records) { + if (RestoreBlobRange(record.GetBlobRange()) == b.first) { + if (columnId != record.GetColumnId()) { + columnStorageId = GetColumnStorageId(record.GetColumnId(), indexInfo); + } + if (columnStorageId != i.first) { + continue; + } + result.emplace(record.GetAddress(), std::move(b.second)); + found = true; + break; + } + } + if (found) { + continue; + } + for (auto&& record : Indexes) { + if (RestoreBlobRange(record.GetBlobRange()) == b.first) { + if (columnId != record.GetIndexId()) { + columnStorageId = indexInfo.GetIndexStorageId(record.GetIndexId()); + } + if (columnStorageId != i.first) { + continue; + } + result.emplace(record.GetAddress(), std::move(b.second)); + found = true; + break; + } + } + AFL_VERIFY(found)("blobs", blobs.DebugString())("records", DebugString(true))("problem", b.first); + } + } + return result; +} + +const TString& TPortionInfo::GetColumnStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const { + return indexInfo.GetColumnStorageId(columnId, GetMeta().GetTierName()); +} + +const TString& TPortionInfo::GetEntityStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const { + return indexInfo.GetEntityStorageId(columnId, GetMeta().GetTierName()); +} + +ISnapshotSchema::TPtr TPortionInfo::GetSchema(const TVersionedIndex& index) const { + if (SchemaVersion) { + auto schema = index.GetSchema(SchemaVersion.value()); + AFL_VERIFY(!!schema)("details", TStringBuilder() << "cannot find schema for version " << SchemaVersion.value()); + return schema; + } + return index.GetSchema(MinSnapshotDeprecated); +} + +void TPortionInfo::FillBlobRangesByStorage(THashMap>& result, const TIndexInfo& indexInfo) const { + for (auto&& i : Records) { + const TString& storageId = GetColumnStorageId(i.GetColumnId(), indexInfo); + AFL_VERIFY(result[storageId].emplace(RestoreBlobRange(i.GetBlobRange())).second)("blob_id", RestoreBlobRange(i.GetBlobRange()).ToString()); + } + for (auto&& i : Indexes) { + const TString& storageId = indexInfo.GetIndexStorageId(i.GetIndexId()); + AFL_VERIFY(result[storageId].emplace(RestoreBlobRange(i.GetBlobRange())).second)("blob_id", RestoreBlobRange(i.GetBlobRange()).ToString()); + } +} + +void TPortionInfo::FillBlobRangesByStorage(THashMap>& result, const TVersionedIndex& index) const { + auto schema = GetSchema(index); + return FillBlobRangesByStorage(result, schema->GetIndexInfo()); +} + +void TPortionInfo::FillBlobIdsByStorage(THashMap>& result, const TIndexInfo& indexInfo) const { + THashMap> local; + THashSet* currentHashLocal; + THashSet* currentHashResult; + ui32 lastEntityId = 0; + TString lastStorageId; + ui32 lastBlobIdx = BlobIds.size(); + for (auto&& i : Records) { + if (lastEntityId != i.GetEntityId()) { + const TString& storageId = GetColumnStorageId(i.GetEntityId(), indexInfo); + if (storageId != lastStorageId) { + currentHashResult = &result[storageId]; + currentHashLocal = &local[storageId]; + lastStorageId = storageId; + lastBlobIdx = BlobIds.size(); + } + } + if (lastBlobIdx != i.GetBlobRange().GetBlobIdxVerified() && currentHashLocal->emplace(i.GetBlobRange().GetBlobIdxVerified()).second) { + auto blobId = GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); + AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); + lastBlobIdx = i.GetBlobRange().GetBlobIdxVerified(); + } + } + for (auto&& i : Indexes) { + if (lastEntityId != i.GetEntityId()) { + const TString& storageId = indexInfo.GetIndexStorageId(i.GetEntityId()); + if (storageId != lastStorageId) { + currentHashResult = &result[storageId]; + currentHashLocal = &local[storageId]; + lastStorageId = storageId; + lastBlobIdx = BlobIds.size(); + } + } + if (lastBlobIdx != i.GetBlobRange().GetBlobIdxVerified() && currentHashLocal->emplace(i.GetBlobRange().GetBlobIdxVerified()).second) { + auto blobId = GetBlobId(i.GetBlobRange().GetBlobIdxVerified()); + AFL_VERIFY(currentHashResult->emplace(blobId).second)("blob_id", blobId.ToStringNew()); + lastBlobIdx = i.GetBlobRange().GetBlobIdxVerified(); + } + } +} + +void TPortionInfo::FillBlobIdsByStorage(THashMap>& result, const TVersionedIndex& index) const { + auto schema = GetSchema(index); + return FillBlobIdsByStorage(result, schema->GetIndexInfo()); +} + +TBlobRangeLink16::TLinkId TPortionInfo::RegisterBlobId(const TUnifiedBlobId& blobId) { + AFL_VERIFY(blobId.IsValid()); + TBlobRangeLink16::TLinkId idx = 0; + for (auto&& i : BlobIds) { + if (i == blobId) { + return idx; + } + ++idx; + } + BlobIds.emplace_back(blobId); + return idx; +} + +THashMap>>> TPortionInfo::RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const { + THashMap>>> result; + for (auto&& c : GetRecords()) { + const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); + auto& storageRecords = result[storageId]; + auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; + blobRecords.emplace_back(std::make_shared(blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())), c, indexInfo.GetColumnFeaturesVerified(c.GetColumnId()))); + blobRecords.back()->SetChunkIdx(c.GetChunkIdx()); + } + for (auto&& c : GetIndexes()) { + const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); + auto& storageRecords = result[storageId]; + auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; + blobRecords.emplace_back(std::make_shared(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), blobs.Extract(storageId, RestoreBlobRange(c.GetBlobRange())))); + blobRecords.back()->SetChunkIdx(c.GetChunkIdx()); + } + return result; +} + +THashMap>> TPortionInfo::GetEntityChunks(const TIndexInfo& indexInfo) const { + THashMap>> result; + for (auto&& c : GetRecords()) { + const TString& storageId = GetColumnStorageId(c.GetColumnId(), indexInfo); + auto& storageRecords = result[storageId]; + auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; + blobRecords.emplace_back(TEntityChunk(c.GetAddress(), c.GetMeta().GetNumRows(), c.GetMeta().GetRawBytes(), c.GetBlobRange())); + } + for (auto&& c : GetIndexes()) { + const TString& storageId = indexInfo.GetIndexStorageId(c.GetIndexId()); + auto& storageRecords = result[storageId]; + auto& blobRecords = storageRecords[GetBlobId(c.GetBlobRange().GetBlobIdxVerified())]; + blobRecords.emplace_back(TEntityChunk(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), c.GetBlobRange())); + } + return result; +} + +void TPortionInfo::ReorderChunks() { + { + auto pred = [](const TColumnRecord& l, const TColumnRecord& r) { + return l.GetAddress() < r.GetAddress(); + }; + std::sort(Records.begin(), Records.end(), pred); + std::optional chunk; + for (auto&& i : Records) { + if (!chunk) { + chunk = i.GetAddress(); + } else { + AFL_VERIFY(*chunk < i.GetAddress()); + chunk = i.GetAddress(); + } + AFL_VERIFY(chunk->GetEntityId()); + } + } + { + auto pred = [](const TIndexChunk& l, const TIndexChunk& r) { + return l.GetAddress() < r.GetAddress(); + }; + std::sort(Indexes.begin(), Indexes.end(), pred); + std::optional chunk; + for (auto&& i : Indexes) { + if (!chunk) { + chunk = i.GetAddress(); + } else { + AFL_VERIFY(*chunk < i.GetAddress()); + chunk = i.GetAddress(); + } + AFL_VERIFY(chunk->GetEntityId()); + } + } +} + +void TPortionInfo::FullValidation() const { + AFL_VERIFY(PathId); + AFL_VERIFY(Portion); + AFL_VERIFY(MinSnapshotDeprecated.Valid()); + std::set blobIdxs; + for (auto&& i : Records) { + blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + } + for (auto&& i : Indexes) { + blobIdxs.emplace(i.GetBlobRange().GetBlobIdxVerified()); + } + if (BlobIds.size()) { + AFL_VERIFY(BlobIds.size() == blobIdxs.size()); + AFL_VERIFY(BlobIds.size() == *blobIdxs.rbegin() + 1); + } else { + AFL_VERIFY(blobIdxs.empty()); + } +} + +ui64 TPortionInfo::GetMinMemoryForReadColumns(const std::optional>& columnIds) const { + ui32 columnId = 0; + ui32 chunkIdx = 0; + + struct TDelta { + i64 BlobBytes = 0; + i64 RawBytes = 0; + void operator+=(const TDelta& add) { + BlobBytes += add.BlobBytes; + RawBytes += add.RawBytes; + } + }; + + std::map diffByPositions; + ui64 position = 0; + ui64 RawBytesCurrent = 0; + ui64 BlobBytesCurrent = 0; + std::optional recordsCount; + + const auto doFlushColumn = [&]() { + if (!recordsCount && position) { + recordsCount = position; + } else { + AFL_VERIFY(*recordsCount == position); + } + if (position) { + TDelta delta; + delta.RawBytes = -1 * RawBytesCurrent; + delta.BlobBytes = -1 * BlobBytesCurrent; + diffByPositions[position] += delta; + } + position = 0; + chunkIdx = 0; + RawBytesCurrent = 0; + BlobBytesCurrent = 0; + }; + + for (auto&& i : Records) { + if (columnIds && !columnIds->contains(i.GetColumnId())) { + continue; + } + if (columnId != i.GetColumnId()) { + if (columnId) { + doFlushColumn(); + } + AFL_VERIFY(i.GetColumnId() > columnId); + AFL_VERIFY(i.GetChunkIdx() == 0); + columnId = i.GetColumnId(); + } else { + AFL_VERIFY(i.GetChunkIdx() == chunkIdx + 1); + } + chunkIdx = i.GetChunkIdx(); + TDelta delta; + delta.RawBytes = -1 * RawBytesCurrent + i.GetMeta().GetRawBytes(); + delta.BlobBytes = -1 * BlobBytesCurrent + i.GetBlobRange().Size; + diffByPositions[position] += delta; + position += i.GetMeta().GetNumRows(); + RawBytesCurrent = i.GetMeta().GetRawBytes(); + BlobBytesCurrent = i.GetBlobRange().Size; + } + if (columnId) { + doFlushColumn(); + } + i64 maxRawBytes = 0; + TDelta current; + for (auto&& i : diffByPositions) { + current += i.second; + AFL_VERIFY(current.BlobBytes >= 0); + AFL_VERIFY(current.RawBytes >= 0); + if (maxRawBytes < current.RawBytes) { + maxRawBytes = current.RawBytes; + } + } + AFL_VERIFY(current.BlobBytes == 0)("real", current.BlobBytes); + AFL_VERIFY(current.RawBytes == 0)("real", current.RawBytes); + return maxRawBytes; +} + +namespace { +template +TPortionInfo::TPreparedBatchData PrepareForAssembleImpl(const TPortionInfo& portion, const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, + THashMap& blobsData) { + std::vector columns; + auto arrowResultSchema = resultSchema.GetSchema(); + columns.reserve(arrowResultSchema->num_fields()); + const ui32 rowsCount = portion.GetRecordsCount(); + for (auto&& i : arrowResultSchema->fields()) { + columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i->name()), resultSchema.GetColumnLoaderOptional(i->name())); + } + { + int skipColumnId = -1; + TPortionInfo::TColumnAssemblingInfo* currentAssembler = nullptr; + for (auto& rec : portion.GetRecords()) { + if (skipColumnId == (int)rec.ColumnId) { + continue; + } + if (!currentAssembler || rec.ColumnId != currentAssembler->GetColumnId()) { + const i32 resultPos = resultSchema.GetFieldIndex(rec.ColumnId); + if (resultPos < 0) { + skipColumnId = rec.ColumnId; + continue; + } + AFL_VERIFY((ui32)resultPos < columns.size()); + currentAssembler = &columns[resultPos]; + } + auto it = blobsData.find(rec.GetAddress()); + AFL_VERIFY(it != blobsData.end())("size", blobsData.size())("address", rec.GetAddress().DebugString()); + currentAssembler->AddBlobInfo(rec.Chunk, rec.GetMeta().GetNumRows(), std::move(it->second)); + blobsData.erase(it); + } + } + + // Make chunked arrays for columns + std::vector preparedColumns; + preparedColumns.reserve(columns.size()); + for (auto& c : columns) { + preparedColumns.emplace_back(c.Compile()); + } + + return TPortionInfo::TPreparedBatchData(std::move(preparedColumns), arrowResultSchema, rowsCount); +} + +} + +namespace { +class TChunkAccessor { +private: + const std::vector& Chunks; + const std::shared_ptr Loader; +public: + TChunkAccessor(const std::vector& chunks, const std::shared_ptr& loader) + : Chunks(chunks) + , Loader(loader) + { + + } + ui64 GetChunksCount() const { + return Chunks.size(); + } + ui64 GetChunkLength(const ui32 idx) const { + return Chunks[idx].GetRecordsCount(); + } + std::shared_ptr GetArray(const ui32 idx) const { + return Chunks[idx].GetArrayVerified(Loader); + } +}; +} + +NArrow::NAccessor::IChunkedArray::TCurrentChunkAddress TDeserializeChunkedArray::DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const { + TChunkAccessor accessor(Chunks, Loader); + return SelectChunk(chunkCurrent, position, accessor); +} + +TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { + return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData); +} + +TPortionInfo::TPreparedBatchData TPortionInfo::PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const { + return PrepareForAssembleImpl(*this, dataSchema, resultSchema, blobsData); +} + +std::shared_ptr TPortionInfo::TPreparedColumn::AssembleForSeqAccess() const { + Y_ABORT_UNLESS(!Blobs.empty()); + + std::vector chunks; + chunks.reserve(Blobs.size()); + ui64 recordsCount = 0; + for (auto& blob : Blobs) { + chunks.push_back(blob.BuildDeserializeChunk(Loader)); + recordsCount += blob.GetExpectedRowsCountVerified(); + } + + return std::make_shared(recordsCount, Loader, std::move(chunks)); +} + std::shared_ptr TPortionInfo::TPreparedColumn::Assemble() const { Y_ABORT_UNLESS(!Blobs.empty()); @@ -289,6 +745,18 @@ std::shared_ptr TPortionInfo::TPreparedColumn::Assemble() c return (*res)->column(0); } +TDeserializeChunkedArray::TChunk TPortionInfo::TAssembleBlobInfo::BuildDeserializeChunk(const std::shared_ptr& loader) const { + if (NullRowsCount) { + Y_ABORT_UNLESS(!Data); + auto emptyBatch = NArrow::MakeEmptyBatch(loader->GetExpectedSchema(), NullRowsCount); + AFL_VERIFY(emptyBatch->num_columns() == 1); + return TDeserializeChunkedArray::TChunk(emptyBatch->column(0)); + } else { + AFL_VERIFY(ExpectedRowsCount); + return TDeserializeChunkedArray::TChunk(*ExpectedRowsCount, Data); + } +} + std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecordBatch(const TColumnLoader& loader) const { if (NullRowsCount) { Y_ABORT_UNLESS(!Data); @@ -299,10 +767,24 @@ std::shared_ptr TPortionInfo::TAssembleBlobInfo::BuildRecord AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "cannot unpack batch")("error", result.status().ToString())("loader", loader.DebugString()); return nullptr; } + if (ExpectedRowsCount) { + AFL_VERIFY((*result)->num_rows() == ExpectedRowsCount)("real", (*result)->num_rows())("expected", ExpectedRowsCount); + } return *result; } } +std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleForSeqAccess() const { + std::vector> columns; + std::vector> fields; + for (auto&& i : Columns) { + columns.emplace_back(i.AssembleForSeqAccess()); + fields.emplace_back(i.GetField()); + } + + return std::make_shared(std::make_shared(fields), std::move(columns)); +} + std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleTable(const TAssembleOptions& options) const { std::vector> columns; std::vector> fields; @@ -330,10 +812,7 @@ std::shared_ptr TPortionInfo::TPreparedBatchData::AssembleTable(co } std::shared_ptr TPortionInfo::TPreparedBatchData::Assemble(const TAssembleOptions& options) const { - auto table = AssembleTable(options); - auto res = table->CombineChunks(); - Y_ABORT_UNLESS(res.ok()); - return NArrow::ToBatch(*res); + return NArrow::ToBatch(AssembleTable(options), true); } } diff --git a/ydb/core/tx/columnshard/engines/portions/portion_info.h b/ydb/core/tx/columnshard/engines/portions/portion_info.h index 30b96733868d..ee3793c8e8f5 100644 --- a/ydb/core/tx/columnshard/engines/portions/portion_info.h +++ b/ydb/core/tx/columnshard/engines/portions/portion_info.h @@ -1,8 +1,11 @@ #pragma once #include "column_record.h" +#include "index_chunk.h" #include "meta.h" #include +#include +#include #include #include #include @@ -10,26 +13,232 @@ #include +namespace NKikimrColumnShardDataSharingProto { +class TPortionInfo; +} + namespace NKikimr::NOlap { +namespace NBlobOperations::NRead { +class TCompositeReadBlobs; +} + struct TIndexInfo; +class TVersionedIndex; class IDbWrapper; +class TDeserializeChunkedArray: public NArrow::NAccessor::IChunkedArray { +private: + using TBase = NArrow::NAccessor::IChunkedArray; +public: + class TChunk { + private: + YDB_READONLY(ui32, RecordsCount, 0); + std::shared_ptr PredefinedArray; + const TString Data; + public: + TChunk(const std::shared_ptr& predefinedArray) + : PredefinedArray(predefinedArray) { + AFL_VERIFY(PredefinedArray); + RecordsCount = PredefinedArray->length(); + } + + TChunk(const ui32 recordsCount, const TString& data) + : RecordsCount(recordsCount) + , Data(data) { + + } + + std::shared_ptr GetArrayVerified(const std::shared_ptr& loader) const { + if (PredefinedArray) { + return PredefinedArray; + } + auto result = loader->ApplyVerified(Data); + AFL_VERIFY(result); + AFL_VERIFY(result->num_columns() == 1); + AFL_VERIFY(result->num_rows() == RecordsCount)("length", result->num_rows())("records_count", RecordsCount); + return result->column(0); + } + }; + + std::shared_ptr Loader; + std::vector Chunks; +protected: + virtual TCurrentChunkAddress DoGetChunk(const std::optional& chunkCurrent, const ui64 position) const override; + virtual std::shared_ptr DoGetChunkedArray() const override { + AFL_VERIFY(false); + return nullptr; + } +public: + TDeserializeChunkedArray(const ui64 recordsCount, const std::shared_ptr& loader, std::vector&& chunks) + : TBase(recordsCount, NArrow::NAccessor::IChunkedArray::EType::SerializedChunkedArray, loader->GetField()->type()) + , Loader(loader) + , Chunks(std::move(chunks)) { + AFL_VERIFY(Loader); + } +}; + +class TEntityChunk { +private: + TChunkAddress Address; + YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY(ui64, RawBytes, 0); + YDB_READONLY_DEF(TBlobRangeLink16, BlobRange); +public: + const TChunkAddress& GetAddress() const { + return Address; + } + + TEntityChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytesSize, const TBlobRangeLink16& blobRange) + : Address(address) + , RecordsCount(recordsCount) + , RawBytes(rawBytesSize) + , BlobRange(blobRange) + { + + } +}; + class TPortionInfo { +public: + using TRuntimeFeatures = ui8; + enum class ERuntimeFeature: TRuntimeFeatures { + Optimized = 1 /* "optimized" */ + }; private: TPortionInfo() = default; ui64 PathId = 0; ui64 Portion = 0; // Id of independent (overlayed by PK) portion of data in pathId - TSnapshot MinSnapshot = TSnapshot::Zero(); // {PlanStep, TxId} is min snapshot for {Granule, Portion} + TSnapshot MinSnapshotDeprecated = TSnapshot::Zero(); // {PlanStep, TxId} is min snapshot for {Granule, Portion} TSnapshot RemoveSnapshot = TSnapshot::Zero(); // {XPlanStep, XTxId} is snapshot where the blob has been removed (i.e. compacted into another one) + std::optional SchemaVersion; TPortionMeta Meta; - std::shared_ptr BlobsOperator; ui64 DeprecatedGranuleId = 0; YDB_READONLY_DEF(std::vector, Indexes); + YDB_READONLY(TRuntimeFeatures, RuntimeFeatures, 0); + std::vector BlobIds; + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto, const TIndexInfo& info); + template + static void CheckChunksOrder(const std::vector& chunks) { + ui32 entityId = 0; + ui32 chunkIdx = 0; + for (auto&& i : chunks) { + if (entityId != i.GetEntityId()) { + AFL_VERIFY(entityId < i.GetEntityId()); + AFL_VERIFY(i.GetChunkIdx() == 0); + entityId = i.GetEntityId(); + chunkIdx = 0; + } else { + AFL_VERIFY(i.GetChunkIdx() == chunkIdx + 1); + chunkIdx = i.GetChunkIdx(); + } + } + } + + template + static void AggregateIndexChunksData(const TAggregator& aggr, const std::vector& chunks, const std::optional>& columnIds, const bool validation) { + if (columnIds) { + auto itColumn = columnIds->begin(); + auto itRecord = chunks.begin(); + ui32 recordsInEntityCount = 0; + while (itRecord != chunks.end() && itColumn != columnIds->end()) { + if (itRecord->GetEntityId() < *itColumn) { + ++itRecord; + } else if (*itColumn < itRecord->GetEntityId()) { + AFL_VERIFY(!validation || recordsInEntityCount)("problem", "validation")("reason", "no_chunks_for_column")("column_id", *itColumn); + ++itColumn; + recordsInEntityCount = 0; + } else { + ++recordsInEntityCount; + aggr(*itRecord); + ++itRecord; + } + } + } else { + for (auto&& i : chunks) { + aggr(i); + } + } + } public: + ui64 GetMinMemoryForReadColumns(const std::optional>& columnIds) const; + + void InitRuntimeFeature(const ERuntimeFeature feature, const bool activity) { + if (activity) { + AddRuntimeFeature(feature); + } else { + RemoveRuntimeFeature(feature); + } + } + + void AddRuntimeFeature(const ERuntimeFeature feature) { + RuntimeFeatures |= (TRuntimeFeatures)feature; + } + + void RemoveRuntimeFeature(const ERuntimeFeature feature) { + RuntimeFeatures &= (Max() - (TRuntimeFeatures)feature); + } + + void OnAfterLoad() const { + CheckChunksOrder(Records); + CheckChunksOrder(Indexes); + } + + bool HasRuntimeFeature(const ERuntimeFeature feature) const { + if (feature == ERuntimeFeature::Optimized) { + if ((RuntimeFeatures & (TRuntimeFeatures)feature)) { + return true; + } else { + return GetTierNameDef(NOlap::NBlobOperations::TGlobal::DefaultStorageId) != NOlap::NBlobOperations::TGlobal::DefaultStorageId; + } + } + return (RuntimeFeatures & (TRuntimeFeatures)feature); + } + + void FullValidation() const; + + bool HasIndexes(const std::set& ids) const { + auto idsCopy = ids; + for (auto&& i : Indexes) { + idsCopy.erase(i.GetIndexId()); + if (idsCopy.empty()) { + return true; + } + } + return false; + } + + void ReorderChunks(); + + THashMap>>> RestoreEntityChunks(NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo) const; + THashMap>> GetEntityChunks(const TIndexInfo & info) const; + + const TBlobRange RestoreBlobRange(const TBlobRangeLink16& linkRange) const { + return linkRange.RestoreRange(GetBlobId(linkRange.GetBlobIdxVerified())); + } + + const TUnifiedBlobId& GetBlobId(const TBlobRangeLink16::TLinkId linkId) const { + AFL_VERIFY(linkId < BlobIds.size()); + return BlobIds[linkId]; + } + + ui32 GetBlobIdsCount() const { + return BlobIds.size(); + } + + THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TIndexInfo& indexInfo) const; + + void SetStatisticsStorage(NStatistics::TPortionStorage&& storage) { + Meta.SetStatisticsStorage(std::move(storage)); + } + + const TString& GetColumnStorageId(const ui32 columnId, const TIndexInfo& indexInfo) const; + const TString& GetEntityStorageId(const ui32 entityId, const TIndexInfo& indexInfo) const; + ui64 GetTxVolume() const; // fake-correct method for determ volume on rewrite this portion in transaction progress + ui64 GetMetadataMemorySize() const; class TPage { private: @@ -46,6 +255,16 @@ class TPortionInfo { } }; + TString GetTierNameDef(const TString& defaultTierName) const { + if (GetMeta().GetTierName()) { + return GetMeta().GetTierName(); + } + return defaultTierName; + } + + static TConclusion BuildFromProto(const NKikimrColumnShardDataSharingProto::TPortionInfo& proto, const TIndexInfo& info); + void SerializeToProto(NKikimrColumnShardDataSharingProto::TPortionInfo& proto) const; + std::vector BuildPages() const; std::vector Records; @@ -58,16 +277,18 @@ class TPortionInfo { return PathId; } - void RegisterBlobId(const TChunkAddress& address, const TUnifiedBlobId& blobId) { + TBlobRangeLink16::TLinkId RegisterBlobId(const TUnifiedBlobId& blobId); + + void RegisterBlobIdx(const TChunkAddress& address, const TBlobRangeLink16::TLinkId blobIdx) { for (auto it = Records.begin(); it != Records.end(); ++it) { if (it->ColumnId == address.GetEntityId() && it->Chunk == address.GetChunkIdx()) { - it->RegisterBlobId(blobId); + it->RegisterBlobIdx(blobIdx); return; } } for (auto it = Indexes.begin(); it != Indexes.end(); ++it) { if (it->GetIndexId() == address.GetEntityId() && it->GetChunkIdx() == address.GetChunkIdx()) { - it->RegisterBlobId(blobId); + it->RegisterBlobIdx(blobIdx); return; } } @@ -118,10 +339,6 @@ class TPortionInfo { return Portion; } - bool HasStorageOperator() const { - return !!BlobsOperator; - } - NJson::TJsonValue SerializeToJsonVisual() const { NJson::TJsonValue result = NJson::JSON_MAP; result.InsertValue("id", Portion); @@ -137,24 +354,18 @@ class TPortionInfo { return result; } - void InitOperator(const std::shared_ptr& bOperator, const bool rewrite) { - if (rewrite) { - AFL_VERIFY(!!BlobsOperator); - } else { - AFL_VERIFY(!BlobsOperator); - } - AFL_VERIFY(!!bOperator); - BlobsOperator = bOperator; - } - static constexpr const ui32 BLOB_BYTES_LIMIT = 8 * 1024 * 1024; - const std::shared_ptr& GetBlobsStorage() const { - Y_ABORT_UNLESS(BlobsOperator); - return BlobsOperator; - } std::vector GetColumnChunksPointers(const ui32 columnId) const; + std::set GetColumnIds() const { + std::set result; + for (auto&& i : Records) { + result.emplace(i.GetColumnId()); + } + return result; + } + TSerializationStats GetSerializationStat(const ISnapshotSchema& schema) const { TSerializationStats result; for (auto&& i : Records) { @@ -167,7 +378,6 @@ class TPortionInfo { void ResetMeta() { Meta = TPortionMeta(); - BlobsOperator = nullptr; } const TPortionMeta& GetMeta() const { @@ -187,18 +397,51 @@ class TPortionInfo { return nullptr; } + std::optional GetEntityRecord(const TChunkAddress& address) const { + for (auto&& c : GetRecords()) { + if (c.GetAddress() == address) { + return TEntityChunk(c.GetAddress(), c.GetMeta().GetNumRows(), c.GetMeta().GetRawBytes(), c.GetBlobRange()); + } + } + for (auto&& c : GetIndexes()) { + if (c.GetAddress() == address) { + return TEntityChunk(c.GetAddress(), c.GetRecordsCount(), c.GetRawBytes(), c.GetBlobRange()); + } + } + return {}; + } + + bool HasEntityAddress(const TChunkAddress& address) const { + for (auto&& c : GetRecords()) { + if (c.GetAddress() == address) { + return true; + } + } + for (auto&& c : GetIndexes()) { + if (c.GetAddress() == address) { + return true; + } + } + return false; + } + bool Empty() const { return Records.empty(); } bool Produced() const { return Meta.GetProduced() != TPortionMeta::EProduced::UNSPECIFIED; } - bool Valid() const { return MinSnapshot.Valid() && PathId && Portion && !Empty() && Produced() && Meta.IndexKeyStart && Meta.IndexKeyEnd; } - bool ValidSnapshotInfo() const { return MinSnapshot.Valid() && PathId && Portion; } + bool Valid() const { return ValidSnapshotInfo() && !Empty() && Produced() && Meta.IndexKeyStart && Meta.IndexKeyEnd; } + bool ValidSnapshotInfo() const { return MinSnapshotDeprecated.Valid() && PathId && Portion; } bool IsInserted() const { return Meta.GetProduced() == TPortionMeta::EProduced::INSERTED; } bool IsEvicted() const { return Meta.GetProduced() == TPortionMeta::EProduced::EVICTED; } bool CanHaveDups() const { return !Produced(); /* || IsInserted(); */ } bool CanIntersectOthers() const { return !Valid() || IsInserted() || IsEvicted(); } size_t NumChunks() const { return Records.size(); } - size_t NumBlobs() const; - TPortionInfo CopyWithFilteredColumns(const THashSet& columnIds) const; + TPortionInfo CopyBeforeChunksRebuild() const { + TPortionInfo result = *this; + result.Records.clear(); + result.Indexes.clear(); + result.BlobIds.clear(); + return result; + } bool IsEqualWithSnapshots(const TPortionInfo& item) const; @@ -206,11 +449,11 @@ class TPortionInfo { return TPortionInfo(); } - TPortionInfo(const ui64 pathId, const ui64 portionId, const TSnapshot& minSnapshot, const std::shared_ptr& blobsOperator) + TPortionInfo(const ui64 pathId, const ui64 portionId, const ui64 schemaVersion, const TSnapshot& minSnapshot) : PathId(pathId) , Portion(portionId) - , MinSnapshot(minSnapshot) - , BlobsOperator(blobsOperator) { + , MinSnapshotDeprecated(minSnapshot) + , SchemaVersion(schemaVersion) { } TString DebugString(const bool withDetails = false) const; @@ -219,12 +462,16 @@ class TPortionInfo { return RemoveSnapshot.Valid(); } - bool CheckForCleanup(const TSnapshot& snapshot) const { + bool IsRemovedFor(const TSnapshot& snapshot) const { if (!HasRemoveSnapshot()) { return false; + } else { + return GetRemoveSnapshotVerified() <= snapshot; } + } - return GetRemoveSnapshot() < snapshot; + bool CheckForCleanup(const TSnapshot& snapshot) const { + return IsRemovedFor(snapshot); } bool CheckForCleanup() const { @@ -260,22 +507,36 @@ class TPortionInfo { DeprecatedGranuleId = granuleId; } - const TSnapshot& GetMinSnapshot() const { - return MinSnapshot; + const TSnapshot& GetMinSnapshotDeprecated() const { + return MinSnapshotDeprecated; } - const TSnapshot& GetRemoveSnapshot() const { + const TSnapshot& GetRemoveSnapshotVerified() const { + AFL_VERIFY(HasRemoveSnapshot()); return RemoveSnapshot; } - void SetMinSnapshot(const TSnapshot& snap) { + std::optional GetRemoveSnapshotOptional() const { + if (RemoveSnapshot.Valid()) { + return RemoveSnapshot; + } else { + return {}; + } + } + + ui64 GetSchemaVersionVerified() const { + AFL_VERIFY(SchemaVersion); + return SchemaVersion.value(); + } + + void SetMinSnapshotDeprecated(const TSnapshot& snap) { Y_ABORT_UNLESS(snap.Valid()); - MinSnapshot = snap; + MinSnapshotDeprecated = snap; } - void SetMinSnapshot(const ui64 planStep, const ui64 txId) { - MinSnapshot = TSnapshot(planStep, txId); - Y_ABORT_UNLESS(MinSnapshot.Valid()); + void SetSchemaVersion(const ui64 version) { + AFL_VERIFY(version); + SchemaVersion = version; } void SetRemoveSnapshot(const TSnapshot& snap) { @@ -290,34 +551,12 @@ class TPortionInfo { Y_ABORT_UNLESS(!wasValid || RemoveSnapshot.Valid()); } - std::pair BlobsSizes() const { - ui32 sum = 0; - ui32 max = 0; - for (const auto& rec : Records) { - sum += rec.BlobRange.Size; - max = Max(max, rec.BlobRange.Size); - } - return {sum, max}; - } - - ui64 GetBlobBytes() const noexcept { - ui64 sum = 0; - for (const auto& rec : Records) { - sum += rec.BlobRange.Size; - } - return sum; - } - - ui64 BlobsBytes() const noexcept { - return GetBlobBytes(); - } - bool IsVisible(const TSnapshot& snapshot) const { if (Empty()) { return false; } - bool visible = (MinSnapshot <= snapshot); + bool visible = (Meta.RecordSnapshotMin <= snapshot); if (visible && RemoveSnapshot.Valid()) { visible = snapshot < RemoveSnapshot; } @@ -359,23 +598,46 @@ class TPortionInfo { return *Meta.RecordSnapshotMax; } - THashSet GetBlobIds() const { - THashSet result; - for (auto&& i : Records) { - result.emplace(i.BlobRange.BlobId); - } - for (auto&& i : Indexes) { - result.emplace(i.GetBlobRange().BlobId); - } + + THashMap> GetBlobIdsByStorage(const TIndexInfo& indexInfo) const { + THashMap> result; + FillBlobIdsByStorage(result, indexInfo); return result; } + class TSchemaCursor { + const NOlap::TVersionedIndex& VersionedIndex; + ISnapshotSchema::TPtr CurrentSchema; + TSnapshot LastSnapshot = TSnapshot::Zero(); + public: + TSchemaCursor(const NOlap::TVersionedIndex& versionedIndex) + : VersionedIndex(versionedIndex) + {} + + ISnapshotSchema::TPtr GetSchema(const TPortionInfo& portion) { + if (!CurrentSchema || portion.MinSnapshotDeprecated != LastSnapshot) { + CurrentSchema = portion.GetSchema(VersionedIndex); + LastSnapshot = portion.GetMinSnapshotDeprecated(); + } + AFL_VERIFY(!!CurrentSchema)("portion", portion.DebugString()); + return CurrentSchema; + } + }; + + ISnapshotSchema::TPtr GetSchema(const TVersionedIndex& index) const; + + void FillBlobRangesByStorage(THashMap>& result, const TIndexInfo& indexInfo) const; + void FillBlobRangesByStorage(THashMap>& result, const TVersionedIndex& index) const; + + void FillBlobIdsByStorage(THashMap>& result, const TIndexInfo& indexInfo) const; + void FillBlobIdsByStorage(THashMap>& result, const TVersionedIndex& index) const; + ui32 GetRecordsCount() const { ui32 result = 0; std::optional columnIdFirst; for (auto&& i : Records) { if (!columnIdFirst || *columnIdFirst == i.ColumnId) { - result += i.GetMeta().GetNumRowsVerified(); + result += i.GetMeta().GetNumRows(); columnIdFirst = i.ColumnId; } } @@ -390,34 +652,54 @@ class TPortionInfo { ui32 result = 0; for (auto&& i : Records) { if (columnId == i.ColumnId) { - result += i.GetMeta().GetNumRowsVerified(); + result += i.GetMeta().GetNumRows(); } } return result; } - ui64 GetIndexBytes(const std::set& columnIds) const; - - ui64 GetRawBytes(const std::vector& columnIds) const; - ui64 GetRawBytes(const std::set& columnIds) const; - ui64 GetRawBytes() const { - ui64 result = 0; - for (auto&& i : Records) { - result += i.GetMeta().GetRawBytesVerified(); + ui64 GetIndexRawBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + ui64 GetIndexBlobBytes() const noexcept { + ui64 sum = 0; + for (const auto& rec : Indexes) { + sum += rec.GetBlobRange().Size; } - return result; + return sum; } - ui64 RawBytesSum() const { - return GetRawBytes(); + ui64 GetColumnRawBytes(const std::vector& columnIds, const bool validation = true) const; + ui64 GetColumnRawBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + + ui64 GetColumnBlobBytes(const std::vector& columnIds, const bool validation = true) const; + ui64 GetColumnBlobBytes(const std::optional>& columnIds = {}, const bool validation = true) const; + + ui64 GetTotalBlobBytes() const noexcept { + return GetIndexBlobBytes() + GetColumnBlobBytes(); } + ui64 GetTotalRawBytes() const { + return GetColumnRawBytes() + GetIndexRawBytes(); + } public: class TAssembleBlobInfo { private: + YDB_READONLY_DEF(std::optional, ExpectedRowsCount); ui32 NullRowsCount = 0; TString Data; public: + ui32 GetExpectedRowsCountVerified() const { + AFL_VERIFY(ExpectedRowsCount); + return *ExpectedRowsCount; + } + + void SetExpectedRecordsCount(const ui32 expectedRowsCount) { + AFL_VERIFY(!ExpectedRowsCount); + ExpectedRowsCount = expectedRowsCount; + if (!Data) { + AFL_VERIFY(*ExpectedRowsCount == NullRowsCount); + } + } + TAssembleBlobInfo(const ui32 rowsCount) : NullRowsCount(rowsCount) { AFL_VERIFY(NullRowsCount); @@ -445,6 +727,7 @@ class TPortionInfo { } std::shared_ptr BuildRecordBatch(const TColumnLoader& loader) const; + TDeserializeChunkedArray::TChunk BuildDeserializeChunk(const std::shared_ptr& loader) const; }; class TPreparedColumn { @@ -472,6 +755,7 @@ class TPortionInfo { } std::shared_ptr Assemble() const; + std::shared_ptr AssembleForSeqAccess() const; }; class TPreparedBatchData { @@ -538,6 +822,7 @@ class TPortionInfo { std::shared_ptr Assemble(const TAssembleOptions& options = {}) const; std::shared_ptr AssembleTable(const TAssembleOptions& options = {}) const; + std::shared_ptr AssembleForSeqAccess() const; }; class TColumnAssemblingInfo { @@ -545,6 +830,7 @@ class TPortionInfo { std::vector BlobsInfo; YDB_READONLY(ui32, ColumnId, 0); const ui32 NumRows; + ui32 NumRowsByChunks = 0; const std::shared_ptr DataLoader; const std::shared_ptr ResultLoader; public: @@ -564,8 +850,10 @@ class TPortionInfo { return ResultLoader->GetField(); } - void AddBlobInfo(const ui32 expectedChunkIdx, TAssembleBlobInfo&& info) { + void AddBlobInfo(const ui32 expectedChunkIdx, const ui32 expectedRecordsCount, TAssembleBlobInfo&& info) { AFL_VERIFY(expectedChunkIdx == BlobsInfo.size()); + info.SetExpectedRecordsCount(expectedRecordsCount); + NumRowsByChunks += expectedRecordsCount; BlobsInfo.emplace_back(std::move(info)); } @@ -574,58 +862,18 @@ class TPortionInfo { BlobsInfo.emplace_back(TAssembleBlobInfo(NumRows)); return TPreparedColumn(std::move(BlobsInfo), ResultLoader); } else { + AFL_VERIFY(NumRowsByChunks == NumRows)("by_chunks", NumRowsByChunks)("expected", NumRows); AFL_VERIFY(DataLoader); return TPreparedColumn(std::move(BlobsInfo), DataLoader); } } }; - template - TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, - THashMap& blobsData) const { - std::vector columns; - auto arrowResultSchema = resultSchema.GetSchema(); - columns.reserve(arrowResultSchema->num_fields()); - const ui32 rowsCount = NumRows(); - for (auto&& i : arrowResultSchema->fields()) { - columns.emplace_back(rowsCount, dataSchema.GetColumnLoaderOptional(i->name()), resultSchema.GetColumnLoaderOptional(i->name())); - } - { - int skipColumnId = -1; - TColumnAssemblingInfo* currentAssembler = nullptr; - for (auto& rec : Records) { - if (skipColumnId == (int)rec.ColumnId) { - continue; - } - if (!currentAssembler || rec.ColumnId != currentAssembler->GetColumnId()) { - const i32 resultPos = resultSchema.GetFieldIndex(rec.ColumnId); - if (resultPos < 0) { - skipColumnId = rec.ColumnId; - continue; - } - AFL_VERIFY((ui32)resultPos < columns.size()); - currentAssembler = &columns[resultPos]; - } - auto it = blobsData.find(rec.BlobRange); - Y_ABORT_UNLESS(it != blobsData.end()); - currentAssembler->AddBlobInfo(rec.Chunk, std::move(it->second)); - blobsData.erase(it); - } - } - - // Make chunked arrays for columns - std::vector preparedColumns; - preparedColumns.reserve(columns.size()); - for (auto& c : columns) { - preparedColumns.emplace_back(c.Compile()); - } - - return TPreparedBatchData(std::move(preparedColumns), arrowResultSchema, rowsCount); - } + TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const; + TPreparedBatchData PrepareForAssemble(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, THashMap& blobsData) const; - std::shared_ptr AssembleInBatch(const ISnapshotSchema& dataSchema, - const ISnapshotSchema& resultSchema, - THashMap& data) const { + std::shared_ptr AssembleInBatch(const ISnapshotSchema& dataSchema, const ISnapshotSchema& resultSchema, + THashMap& data) const { auto batch = PrepareForAssemble(dataSchema, resultSchema, data).Assemble(); Y_ABORT_UNLESS(batch->Validate().ok()); return batch; diff --git a/ydb/core/tx/columnshard/engines/portions/with_blobs.cpp b/ydb/core/tx/columnshard/engines/portions/with_blobs.cpp index 9d8adcac99f2..7a29d2779c79 100644 --- a/ydb/core/tx/columnshard/engines/portions/with_blobs.cpp +++ b/ydb/core/tx/columnshard/engines/portions/with_blobs.cpp @@ -1,6 +1,10 @@ #include "with_blobs.h" #include #include +#include +#include +#include +#include namespace NKikimr::NOlap { @@ -8,40 +12,38 @@ void TPortionInfoWithBlobs::TBlobInfo::RestoreChunk(const TPortionInfoWithBlobs& Y_ABORT_UNLESS(!ResultBlob); const TString& data = chunk->GetData(); Size += data.size(); - auto address = chunk->GetChunkAddress(); - Y_ABORT_UNLESS(owner.GetPortionInfo().GetRecordPointer(address)); - Y_ABORT_UNLESS(Chunks.emplace(address, chunk).second); + auto address = chunk->GetChunkAddressVerified(); + AFL_VERIFY(owner.GetPortionInfo().HasEntityAddress(address))("address", address.DebugString()); + AFL_VERIFY(Chunks.emplace(address, chunk).second)("address", address.DebugString()); ChunksOrdered.emplace_back(chunk); } void TPortionInfoWithBlobs::TBlobInfo::AddChunk(TPortionInfoWithBlobs& owner, const std::shared_ptr& chunk) { AFL_VERIFY(chunk); Y_ABORT_UNLESS(!ResultBlob); - TBlobRange bRange; const TString& data = chunk->GetData(); - bRange.Offset = Size; - bRange.Size = data.size(); - + TBlobRangeLink16 bRange(Size, data.size()); Size += data.size(); - Y_ABORT_UNLESS(Chunks.emplace(chunk->GetChunkAddress(), chunk).second); + Y_ABORT_UNLESS(Chunks.emplace(chunk->GetChunkAddressVerified(), chunk).second); ChunksOrdered.emplace_back(chunk); - chunk->AddIntoPortion(bRange, owner.PortionInfo); + chunk->AddIntoPortionBeforeBlob(bRange, owner.PortionInfo); } void TPortionInfoWithBlobs::TBlobInfo::RegisterBlobId(TPortionInfoWithBlobs& owner, const TUnifiedBlobId& blobId) { + const TBlobRangeLink16::TLinkId idx = owner.PortionInfo.RegisterBlobId(blobId); for (auto&& i : Chunks) { - owner.PortionInfo.RegisterBlobId(i.first, blobId); + owner.PortionInfo.RegisterBlobIdx(i.first, idx); } } void TPortionInfoWithBlobs::TBlobInfo::ExtractEntityChunks(const ui32 entityId, std::map>& resultMap) { const auto pred = [this, &resultMap, entityId](const std::shared_ptr& chunk) { if (chunk->GetEntityId() == entityId) { - resultMap.emplace(chunk->GetChunkAddress(), chunk); - Chunks.erase(chunk->GetChunkAddress()); + resultMap.emplace(chunk->GetChunkAddressVerified(), chunk); + Chunks.erase(chunk->GetChunkAddressVerified()); return true; } else { return false; @@ -54,10 +56,10 @@ std::shared_ptr TPortionInfoWithBlobs::GetBatch(const ISnaps Y_ABORT_UNLESS(data); if (columnNames.empty()) { if (!CachedBatch) { - THashMap blobs; + THashMap blobs; for (auto&& i : PortionInfo.Records) { - blobs[i.BlobRange] = GetBlobByRangeVerified(i.ColumnId, i.Chunk); - Y_ABORT_UNLESS(blobs[i.BlobRange].size() == i.BlobRange.Size); + blobs[i.GetAddress()] = GetBlobByRangeVerified(i.ColumnId, i.Chunk); + Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); } CachedBatch = PortionInfo.AssembleInBatch(*data, result, blobs); Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(*CachedBatch, result.GetIndexInfo().GetReplaceKey())); @@ -73,99 +75,59 @@ std::shared_ptr TPortionInfoWithBlobs::GetBatch(const ISnaps return result; } else { auto filteredSchema = std::make_shared(data, columnNames); - THashMap blobs; + THashMap blobs; for (auto&& i : PortionInfo.Records) { - blobs[i.BlobRange] = GetBlobByRangeVerified(i.ColumnId, i.Chunk); - Y_ABORT_UNLESS(blobs[i.BlobRange].size() == i.BlobRange.Size); + blobs[i.GetAddress()] = GetBlobByRangeVerified(i.ColumnId, i.Chunk); + Y_ABORT_UNLESS(blobs[i.GetAddress()].size() == i.BlobRange.Size); } return PortionInfo.AssembleInBatch(*data, *filteredSchema, blobs); } } -NKikimr::NOlap::TPortionInfoWithBlobs TPortionInfoWithBlobs::RestorePortion(const TPortionInfo& portion, THashMap& blobs) { +NKikimr::NOlap::TPortionInfoWithBlobs TPortionInfoWithBlobs::RestorePortion(const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, const TIndexInfo& indexInfo, const std::shared_ptr& operators) { TPortionInfoWithBlobs result(portion); - const auto pred = [](const TColumnRecord& l, const TColumnRecord& r) { - return l.GetAddress() < r.GetAddress(); - }; - std::sort(result.PortionInfo.Records.begin(), result.PortionInfo.Records.end(), pred); - - THashMap> recordsByBlob; - for (auto&& c : result.PortionInfo.Records) { - auto& blobRecords = recordsByBlob[c.BlobRange.BlobId]; - blobRecords.emplace_back(&c); - } - - const auto predOffset = [](const TColumnRecord* l, const TColumnRecord* r) { - return l->BlobRange.Offset < r->BlobRange.Offset; - }; - - for (auto&& i : recordsByBlob) { - std::sort(i.second.begin(), i.second.end(), predOffset); - auto builder = result.StartBlob(); - for (auto&& d : i.second) { - auto itBlob = blobs.find(d->BlobRange); - Y_ABORT_UNLESS(itBlob != blobs.end()); - builder.RestoreChunk(std::make_shared(*d, itBlob->second)); - blobs.erase(itBlob); + THashMap>>> records = result.PortionInfo.RestoreEntityChunks(blobs, indexInfo); + for (auto&& [storageId, recordsByBlob] : records) { + auto storage = operators->GetOperatorVerified(storageId); + for (auto&& i : recordsByBlob) { + auto builder = result.StartBlob(storage); + for (auto&& d : i.second) { + builder.RestoreChunk(d); + } } } return result; } -std::vector TPortionInfoWithBlobs::RestorePortions(const std::vector& portions, THashMap& blobs) { +std::vector TPortionInfoWithBlobs::RestorePortions(const std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, + const TVersionedIndex& tables, const std::shared_ptr& operators) { std::vector result; for (auto&& i : portions) { - result.emplace_back(RestorePortion(i, blobs)); + const auto schema = i.GetSchema(tables); + result.emplace_back(RestorePortion(i, blobs, schema->GetIndexInfo(), operators)); } return result; } -NKikimr::NOlap::TPortionInfoWithBlobs TPortionInfoWithBlobs::BuildByBlobs(std::vector>>& chunksByBlobs, std::shared_ptr batch, const ui64 granule, - const TSnapshot& snapshot, const std::shared_ptr& bStorageOperator) { - TPortionInfoWithBlobs result(TPortionInfo(granule, 0, snapshot, bStorageOperator), batch); - for (auto& blob : chunksByBlobs) { - auto blobInfo = result.StartBlob(); - for (auto&& chunk : blob) { - blobInfo.AddChunk(chunk); - } - } - - const auto pred = [](const TColumnRecord& l, const TColumnRecord& r) { - return l.GetAddress() < r.GetAddress(); - }; - std::sort(result.GetPortionInfo().Records.begin(), result.GetPortionInfo().Records.end(), pred); +NKikimr::NOlap::TPortionInfoWithBlobs TPortionInfoWithBlobs::BuildByBlobs(std::vector&& chunks, + std::shared_ptr batch, const ui64 granule, const ui64 schemaVersion, const TSnapshot& snapshot, const std::shared_ptr& operators) +{ + TPortionInfoWithBlobs result = BuildByBlobs(std::move(chunks), TPortionInfo(granule, 0, schemaVersion, snapshot), operators); + result.InitBatchCached(batch); return result; } -std::optional TPortionInfoWithBlobs::ChangeSaver(ISnapshotSchema::TPtr currentSchema, const TSaverContext& saverContext) const { - TPortionInfoWithBlobs result(PortionInfo, CachedBatch); - result.PortionInfo.Records.clear(); - std::optional bBuilder; - for (auto& rec : PortionInfo.Records) { - auto field = currentSchema->GetFieldByColumnIdVerified(rec.ColumnId); - - const TString blobOriginal = GetBlobByRangeVerified(rec.ColumnId, rec.Chunk); - { - auto rb = NArrow::TStatusValidator::GetValid(currentSchema->GetColumnLoaderVerified(rec.ColumnId)->Apply(blobOriginal)); - auto columnSaver = currentSchema->GetColumnSaver(rec.ColumnId, saverContext); - const TString newBlob = columnSaver.Apply(rb); - if (newBlob.size() >= TPortionInfo::BLOB_BYTES_LIMIT) { - return {}; - } - if (!bBuilder || result.GetBlobs().back().GetSize() + newBlob.size() >= TPortionInfo::BLOB_BYTES_LIMIT) { - bBuilder = result.StartBlob(); - } - Y_ABORT_UNLESS(rb); - Y_ABORT_UNLESS(rb->num_columns() == 1); - - bBuilder->AddChunk(std::make_shared(rec, newBlob)); +TPortionInfoWithBlobs TPortionInfoWithBlobs::BuildByBlobs(std::vector&& chunks, const TPortionInfo& basePortion, + const std::shared_ptr& operators) { + TPortionInfoWithBlobs result(basePortion.CopyBeforeChunksRebuild()); + for (auto&& blob : chunks) { + auto storage = operators->GetOperatorVerified(blob.GetGroupName()); + auto blobInfo = result.StartBlob(storage); + for (auto&& chunk : blob.GetChunks()) { + blobInfo.AddChunk(chunk); } } - const auto pred = [](const TColumnRecord& l, const TColumnRecord& r) { - return l.GetAddress() < r.GetAddress(); - }; - std::sort(result.PortionInfo.Records.begin(), result.PortionInfo.Records.end(), pred); - + result.GetPortionInfo().ReorderChunks(); return result; } @@ -180,7 +142,7 @@ std::vector> TPortionInfoWithBlobs::GetEntity } std::vector> result; for (auto&& i : sortedChunks) { - AFL_VERIFY(i.second->GetChunkIdx() == result.size())("idx", i.second->GetChunkIdx())("size", result.size()); + AFL_VERIFY(i.second->GetChunkIdxVerified() == result.size())("idx", i.second->GetChunkIdxVerified())("size", result.size()); result.emplace_back(i.second); } return result; @@ -204,4 +166,76 @@ bool TPortionInfoWithBlobs::ExtractColumnChunks(const ui32 columnId, std::vector std::swap(chunksLocal, chunks); return true; } + +void TPortionInfoWithBlobs::FillStatistics(const TIndexInfo& index) { + NStatistics::TPortionStorage storage; + for (auto&& i : index.GetStatisticsByName()) { + THashMap>> data; + for (auto&& entityId : i.second->GetEntityIds()) { + data.emplace(entityId, GetEntityChunks(entityId)); + } + i.second->FillStatisticsData(data, storage, index); + } + PortionInfo.SetStatisticsStorage(std::move(storage)); +} + +TPortionInfoWithBlobs TPortionInfoWithBlobs::SyncPortion(TPortionInfoWithBlobs&& source, + const ISnapshotSchema::TPtr& from, const ISnapshotSchema::TPtr& to, const TString& targetTier, const std::shared_ptr& storages, + std::shared_ptr counters) { + if (from->GetVersion() == to->GetVersion() && targetTier == source.GetPortionInfo().GetTierNameDef(IStoragesManager::DefaultStorageId)) { + return std::move(source); + } + NYDBTest::TControllers::GetColumnShardController()->OnPortionActualization(source.PortionInfo); + auto pages = source.PortionInfo.BuildPages(); + std::vector pageSizes; + for (auto&& p : pages) { + pageSizes.emplace_back(p.GetRecordsCount()); + } + THashMap>> columnChunks; + for (auto&& i : source.Blobs) { + for (auto&& c : i.GetChunks()) { + columnChunks[c.first.GetColumnId()].emplace_back(c.second); + } + } + + THashMap>> entityChunksNew; + for (auto&& i : to->GetIndexInfo().GetColumnIds()) { + auto it = columnChunks.find(i); + std::vector> newChunks; + if (it != columnChunks.end()) { + newChunks = to->GetIndexInfo().ActualizeColumnData(it->second, from->GetIndexInfo(), i); + } else { + newChunks = to->GetIndexInfo().MakeEmptyChunks(i, pageSizes, to->GetIndexInfo().GetColumnFeaturesVerified(i)); + } + AFL_VERIFY(entityChunksNew.emplace(i, std::move(newChunks)).second); + } + + for (auto&& i : to->GetIndexInfo().GetIndexes()) { + if (from->GetIndexInfo().HasIndexId(i.first)) { + continue; + } + to->GetIndexInfo().AppendIndex(entityChunksNew, i.first); + } + + auto schemaTo = std::make_shared(to, std::make_shared()); + TGeneralSerializedSlice slice(entityChunksNew, schemaTo, counters); + const NSplitter::TEntityGroups groups = to->GetIndexInfo().GetEntityGroupsByStorageId(targetTier, *storages); + TPortionInfoWithBlobs result = TPortionInfoWithBlobs::BuildByBlobs(slice.GroupChunksByBlobs(groups), source.PortionInfo, storages); + result.GetPortionInfo().SetMinSnapshotDeprecated(to->GetSnapshot()); + result.GetPortionInfo().SetSchemaVersion(to->GetVersion()); + result.GetPortionInfo().MutableMeta().SetTierName(targetTier); + + NStatistics::TPortionStorage storage; + for (auto&& i : to->GetIndexInfo().GetStatisticsByName()) { + auto it = from->GetIndexInfo().GetStatisticsByName().find(i.first); + if (it != from->GetIndexInfo().GetStatisticsByName().end()) { + i.second->CopyData(it->second.GetCursorVerified(), source.PortionInfo.GetMeta().GetStatisticsStorage(), storage); + } else { + i.second->FillStatisticsData(entityChunksNew, storage, to->GetIndexInfo()); + } + } + result.PortionInfo.MutableMeta().ResetStatisticsStorage(std::move(storage)); + return result; +} + } diff --git a/ydb/core/tx/columnshard/engines/portions/with_blobs.h b/ydb/core/tx/columnshard/engines/portions/with_blobs.h index 1a7a6b7192cb..83e2dc68fda8 100644 --- a/ydb/core/tx/columnshard/engines/portions/with_blobs.h +++ b/ydb/core/tx/columnshard/engines/portions/with_blobs.h @@ -1,11 +1,17 @@ #pragma once #include "portion_info.h" -#include -#include #include +#include +#include +#include +#include + +#include namespace NKikimr::NOlap { +class TVersionedIndex; + class TPortionInfoWithBlobs { public: class TBlobInfo { @@ -13,26 +19,36 @@ class TPortionInfoWithBlobs { using TBlobChunks = std::map>; YDB_READONLY(ui64, Size, 0); YDB_READONLY_DEF(TBlobChunks, Chunks); + YDB_READONLY_DEF(std::shared_ptr, Operator); std::vector> ChunksOrdered; mutable std::optional ResultBlob; void AddChunk(TPortionInfoWithBlobs& owner, const std::shared_ptr& chunk); void RestoreChunk(const TPortionInfoWithBlobs& owner, const std::shared_ptr& chunk); public: + TBlobInfo(const std::shared_ptr& bOperator) + : Operator(bOperator) + { + + } + class TBuilder { private: TBlobInfo* OwnerBlob; TPortionInfoWithBlobs* OwnerPortion; - public: TBuilder(TBlobInfo& blob, TPortionInfoWithBlobs& portion) : OwnerBlob(&blob) , OwnerPortion(&portion) { } + ui64 GetSize() const { + return OwnerBlob->GetSize(); + } + void AddChunk(const std::shared_ptr& chunk) { return OwnerBlob->AddChunk(*OwnerPortion, chunk); } - void RestoreChunk(const std::shared_ptr& chunk) { + void RestoreChunk(const std::shared_ptr& chunk) { OwnerBlob->RestoreChunk(*OwnerPortion, chunk); } }; @@ -71,29 +87,40 @@ class TPortionInfoWithBlobs { PortionInfo = portionInfo; } - TBlobInfo::TBuilder StartBlob() { - Blobs.emplace_back(TBlobInfo()); + TBlobInfo::TBuilder StartBlob(const std::shared_ptr& bOperator) { + Blobs.emplace_back(TBlobInfo(bOperator)); return TBlobInfo::TBuilder(Blobs.back(), *this); } public: - static std::vector RestorePortions(const std::vector& portions, THashMap& blobs); - static TPortionInfoWithBlobs RestorePortion(const TPortionInfo& portions, THashMap& blobs); + void InitBatchCached(const std::shared_ptr& batch) { + if (!batch) { + return; + } + CachedBatch = batch; + } + + static std::vector RestorePortions(const std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, + const TVersionedIndex& tables, const std::shared_ptr& operators); + static TPortionInfoWithBlobs RestorePortion(const TPortionInfo& portion, NBlobOperations::NRead::TCompositeReadBlobs& blobs, + const TIndexInfo& indexInfo, const std::shared_ptr& operators); std::shared_ptr GetBatch(const ISnapshotSchema::TPtr& data, const ISnapshotSchema& result, const std::set& columnNames = {}) const; + static TPortionInfoWithBlobs SyncPortion(TPortionInfoWithBlobs&& source, + const ISnapshotSchema::TPtr& from, const ISnapshotSchema::TPtr& to, const TString& targetTier, const std::shared_ptr& storages, + std::shared_ptr counters); std::vector> GetEntityChunks(const ui32 entityId) const; bool ExtractColumnChunks(const ui32 columnId, std::vector& records, std::vector>& chunks); - ui64 GetSize() const { - return PortionInfo.BlobsBytes(); - } + void FillStatistics(const TIndexInfo& index); - static TPortionInfoWithBlobs BuildByBlobs(std::vector>>& chunksByBlobs, std::shared_ptr batch, const ui64 granule, const TSnapshot& snapshot, - const std::shared_ptr& bStorageOperator); + static TPortionInfoWithBlobs BuildByBlobs(std::vector&& chunks, + std::shared_ptr batch, const ui64 granule, const ui64 schemaVersion, const TSnapshot& snapshot, const std::shared_ptr& operators); - std::optional ChangeSaver(ISnapshotSchema::TPtr currentSchema, const TSaverContext& saverContext) const; + static TPortionInfoWithBlobs BuildByBlobs(std::vector&& chunks, const TPortionInfo& basePortion, + const std::shared_ptr& operators); const TString& GetBlobByRangeVerified(const ui32 columnId, const ui32 chunkId) const { for (auto&& b : Blobs) { diff --git a/ydb/core/tx/columnshard/engines/portions/ya.make b/ydb/core/tx/columnshard/engines/portions/ya.make index 7a6c96a9a8a2..96254fce4299 100644 --- a/ydb/core/tx/columnshard/engines/portions/ya.make +++ b/ydb/core/tx/columnshard/engines/portions/ya.make @@ -6,12 +6,14 @@ SRCS( with_blobs.cpp meta.cpp common.cpp + index_chunk.cpp ) PEERDIR( ydb/core/tx/columnshard/engines/scheme ydb/core/tx/columnshard/splitter ydb/core/tx/columnshard/common + ydb/core/tx/columnshard/data_sharing/protos ydb/core/tablet_flat ) diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/abstract.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.cpp new file mode 100644 index 000000000000..04715867e012 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.cpp @@ -0,0 +1,9 @@ +#include "abstract.h" + +namespace NKikimr::NOlap::NReader { + +const TReadStats& TScanIteratorBase::GetStats() const { + return Default(); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h new file mode 100644 index 000000000000..2681626b6d4f --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/abstract/abstract.h @@ -0,0 +1,45 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NReader { + +class TScanIteratorBase { +protected: + virtual void DoOnSentDataFromInterval(const ui32 /*intervalIdx*/) const { + + } +public: + virtual ~TScanIteratorBase() = default; + + virtual void Apply(IDataTasksProcessor::ITask::TPtr /*processor*/) { + + } + + virtual TConclusionStatus Start() = 0; + + virtual const TReadStats& GetStats() const; + + void OnSentDataFromInterval(const std::optional intervalIdx) const { + if (intervalIdx) { + DoOnSentDataFromInterval(*intervalIdx); + } + } + + virtual std::optional GetAvailableResultsCount() const { + return {}; + } + virtual bool Finished() const = 0; + virtual TConclusion> GetBatch() = 0; + virtual void PrepareResults() { + + } + virtual TConclusion ReadNextInterval() { return false; } + virtual TString DebugString(const bool verbose = false) const { + Y_UNUSED(verbose); + return "NO_DATA"; + } +}; + +} diff --git a/ydb/core/tx/columnshard/columnshard__read_base.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/constructor.cpp similarity index 50% rename from ydb/core/tx/columnshard/columnshard__read_base.cpp rename to ydb/core/tx/columnshard/engines/reader/abstract/constructor.cpp index 000b8e7e63c8..dba163fcfd15 100644 --- a/ydb/core/tx/columnshard/columnshard__read_base.cpp +++ b/ydb/core/tx/columnshard/engines/reader/abstract/constructor.cpp @@ -1,37 +1,10 @@ -#include -#include -#include +#include "constructor.h" +#include -namespace NKikimr::NColumnShard { - - -std::shared_ptr -TTxReadBase::PrepareReadMetadata(const NOlap::TReadDescription& read, - const std::unique_ptr& insertTable, - const std::unique_ptr& index, - TString& error, const bool isReverse) const { - if (!insertTable || !index) { - return nullptr; - } - - if (read.GetSnapshot().GetPlanStep() < Self->GetMinReadStep()) { - error = TStringBuilder() << "Snapshot too old: " << read.GetSnapshot(); - return nullptr; - } - - NOlap::TDataStorageAccessor dataAccessor(insertTable, index); - auto readMetadata = std::make_shared(index->GetVersionedIndex(), read.GetSnapshot(), - isReverse ? NOlap::TReadMetadata::ESorting::DESC : NOlap::TReadMetadata::ESorting::ASC, read.GetProgram()); - - if (!readMetadata->Init(read, dataAccessor, error)) { - return nullptr; - } - return readMetadata; -} - -bool TTxReadBase::ParseProgram(NKikimrSchemeOp::EOlapProgramType programType, - TString serializedProgram, NOlap::TReadDescription& read, const NOlap::IColumnResolver& columnResolver) { +namespace NKikimr::NOlap::NReader { +NKikimr::TConclusionStatus IScannerConstructor::ParseProgram(const TVersionedIndex* vIndex, + const NKikimrSchemeOp::EOlapProgramType programType, const TString& serializedProgram, TReadDescription& read, const IColumnResolver& columnResolver) const { AFL_VERIFY(!read.ColumnIds.size() || !read.ColumnNames.size()); std::vector names; std::set namesChecker; @@ -44,17 +17,16 @@ bool TTxReadBase::ParseProgram(NKikimrSchemeOp::EOlapProgramType programType, names.emplace_back(i); AFL_VERIFY(namesChecker.emplace(names.back()).second); } - NOlap::TProgramContainer container; + TProgramContainer container; AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "overriden_columns")("columns", JoinSeq(",", names)); container.OverrideProcessingColumns(std::vector(names.begin(), names.end())); read.SetProgram(std::move(container)); - return true; + return TConclusionStatus::Success(); } else { - NOlap::TProgramContainer ssaProgram; + TProgramContainer ssaProgram; TString error; if (!ssaProgram.Init(columnResolver, programType, serializedProgram, error)) { - ErrorDescription = TStringBuilder() << "Can't parse SsaProgram at " << Self->TabletID() << " / " << error; - return false; + return TConclusionStatus::Fail(TStringBuilder() << "Can't parse SsaProgram: " << error); } if (names.size()) { @@ -65,8 +37,8 @@ bool TTxReadBase::ParseProgram(NKikimrSchemeOp::EOlapProgramType programType, } } //its possible dont use columns from filter where pk field compare with null and remove from PKFilter and program, but stay in kqp columns request - if (Self->TablesManager.HasPrimaryIndex()) { - for (auto&& i : Self->TablesManager.GetIndexInfo(read.GetSnapshot()).GetReplaceKey()->field_names()) { + if (vIndex) { + for (auto&& i : vIndex->GetSchema(read.GetSnapshot())->GetIndexInfo().GetReplaceKey()->field_names()) { const TString cId(i.data(), i.size()); namesChecker.erase(cId); programColumns.erase(cId); @@ -78,21 +50,31 @@ bool TTxReadBase::ParseProgram(NKikimrSchemeOp::EOlapProgramType programType, }; if (namesChecker.size() != programColumns.size()) { - ErrorDescription = getDiffColumnsMessage(); - return false; + return TConclusionStatus::Fail(getDiffColumnsMessage()); } for (auto&& i : namesChecker) { if (!programColumns.contains(i)) { - ErrorDescription = getDiffColumnsMessage(); - return false; + return TConclusionStatus::Fail(getDiffColumnsMessage()); } } } read.SetProgram(std::move(ssaProgram)); - return true; + return TConclusionStatus::Success(); } } +NKikimr::TConclusion> IScannerConstructor::BuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { + TConclusion> result = DoBuildReadMetadata(self, read); + if (result.IsFail()) { + return result; + } else if (!*result) { + return result.DetachResult(); + } else { + (*result)->Limit = ItemsLimit; + return result.DetachResult(); + } } + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/constructor.h b/ydb/core/tx/columnshard/engines/reader/abstract/constructor.h new file mode 100644 index 000000000000..584ea78aa5a0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/abstract/constructor.h @@ -0,0 +1,37 @@ +#pragma once +#include "read_metadata.h" +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader { + +class IScannerConstructor { +protected: + const TSnapshot Snapshot; + const ui64 ItemsLimit; + const bool IsReverse; + TConclusionStatus ParseProgram(const TVersionedIndex* vIndex, const NKikimrSchemeOp::EOlapProgramType programType, + const TString& serializedProgram, TReadDescription& read, const IColumnResolver& columnResolver) const; +private: + virtual TConclusion> DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const = 0; + +public: + virtual ~IScannerConstructor() = default; + + IScannerConstructor(const TSnapshot& snapshot, const ui64 itemsLimit, const bool reverse) + : Snapshot(snapshot) + , ItemsLimit(itemsLimit) + , IsReverse(reverse) + { + + } + + virtual TConclusionStatus ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const = 0; + virtual std::vector GetPrimaryKeyScheme(const NColumnShard::TColumnShard* self) const = 0; + TConclusion> BuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_context.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.cpp new file mode 100644 index 000000000000..b54de1722f16 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.cpp @@ -0,0 +1,9 @@ +#include "read_context.h" + +namespace NKikimr::NOlap::NReader { + +IDataReader::IDataReader(const std::shared_ptr& context) + : Context(context) { +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/read_context.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h similarity index 80% rename from ydb/core/tx/columnshard/engines/reader/read_context.h rename to ydb/core/tx/columnshard/engines/reader/abstract/read_context.h index bc5d5fa1ba86..fbcdab4d8622 100644 --- a/ydb/core/tx/columnshard/engines/reader/read_context.h +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_context.h @@ -1,16 +1,13 @@ #pragma once -#include "conveyor_task.h" #include "read_metadata.h" -#include -#include -#include +#include #include -#include #include -#include -#include +#include +#include +#include -namespace NKikimr::NOlap { +namespace NKikimr::NOlap::NReader { class TComputeShardingPolicy { private: @@ -39,20 +36,6 @@ class TComputeShardingPolicy { } }; -class TActorBasedMemoryAccesor: public TScanMemoryLimiter::IMemoryAccessor { -private: - using TBase = TScanMemoryLimiter::IMemoryAccessor; - const NActors::TActorIdentity OwnerId; -protected: - virtual void DoOnBufferReady() override; -public: - TActorBasedMemoryAccesor(const NActors::TActorIdentity& ownerId, const TString& limiterName) - : TBase(TMemoryLimitersController::GetLimiter(limiterName)) - , OwnerId(ownerId) { - - } -}; - class TReadContext { private: YDB_READONLY_DEF(std::shared_ptr, StoragesManager); @@ -62,8 +45,15 @@ class TReadContext { const TActorId ScanActorId; const TActorId ResourceSubscribeActorId; const TActorId ReadCoordinatorActorId; - const NOlap::TComputeShardingPolicy ComputeShardingPolicy; + const TComputeShardingPolicy ComputeShardingPolicy; public: + template + std::shared_ptr GetReadMetadataPtrVerifiedAs() const { + auto result = dynamic_pointer_cast(ReadMetadata); + AFL_VERIFY(result); + return result; + } + bool IsReverse() const { return ReadMetadata->IsDescSorted(); } @@ -97,7 +87,7 @@ class TReadContext { } TReadContext(const std::shared_ptr& storagesManager, const NColumnShard::TConcreteScanCounters& counters, const TReadMetadataBase::TConstPtr& readMetadata, - const TActorId& scanActorId, const TActorId& resourceSubscribeActorId, const TActorId& readCoordinatorActorId, const NOlap::TComputeShardingPolicy& computeShardingPolicy) + const TActorId& scanActorId, const TActorId& resourceSubscribeActorId, const TActorId& readCoordinatorActorId, const TComputeShardingPolicy& computeShardingPolicy) : StoragesManager(storagesManager) , Counters(counters) , ReadMetadata(readMetadata) @@ -114,15 +104,24 @@ class TReadContext { class IDataReader { protected: std::shared_ptr Context; + bool Started = false; + virtual TConclusionStatus DoStart() = 0; virtual TString DoDebugString(const bool verbose) const = 0; virtual void DoAbort() = 0; virtual bool DoIsFinished() const = 0; virtual std::vector DoExtractReadyResults(const int64_t maxRowsInBatch) = 0; - virtual bool DoReadNextInterval() = 0; + virtual TConclusion DoReadNextInterval() = 0; public: - IDataReader(const std::shared_ptr& context); + IDataReader(const std::shared_ptr& context); virtual ~IDataReader() = default; + TConclusionStatus Start() { + AFL_VERIFY(!Started); + Started = true; + return DoStart(); + } + virtual void OnSentDataFromInterval(const ui32 intervalIdx) const = 0; + const TReadContext& GetContext() const { return *Context; } @@ -135,7 +134,8 @@ class IDataReader { return Context->GetCounters(); } - void Abort() { + void Abort(const TString& reason) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scan_aborted")("reason", reason); return DoAbort(); } @@ -166,7 +166,7 @@ class IDataReader { sb << DoDebugString(verbose); return sb; } - bool ReadNextInterval() { + [[nodiscard]] TConclusion ReadNextInterval() { return DoReadNextInterval(); } }; diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp new file mode 100644 index 000000000000..e6fc29578f1c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.cpp @@ -0,0 +1,31 @@ +#include "read_metadata.h" +#include + +namespace NKikimr::NOlap::NReader { + +TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& insertTable, + const std::unique_ptr& index) + : InsertTable(insertTable) + , Index(index) +{} + +std::shared_ptr TDataStorageAccessor::Select(const TReadDescription& readDescription) const { + if (readDescription.ReadNothing) { + return std::make_shared(); + } + return Index->Select(readDescription.PathId, + readDescription.GetSnapshot(), + readDescription.PKRangesFilter); +} + +ISnapshotSchema::TPtr TReadMetadataBase::GetLoadSchemaVerified(const TPortionInfo& portion) const { + auto schema = portion.GetSchema(GetIndexVersions()); + AFL_VERIFY(schema); + return schema; +} + +std::vector TDataStorageAccessor::GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema) const { + return std::move(InsertTable->Read(readDescription.PathId, readDescription.GetSnapshot(), pkSchema)); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h new file mode 100644 index 000000000000..7d06dee3cb2a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/abstract/read_metadata.h @@ -0,0 +1,151 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap { + class TPortionInfo; +} +namespace NKikimr::NOlap::NReader { + +class TScanIteratorBase; +class TReadContext; + +class TDataStorageAccessor { +private: + const std::unique_ptr& InsertTable; + const std::unique_ptr& Index; + +public: + TDataStorageAccessor(const std::unique_ptr& insertTable, + const std::unique_ptr& index); + std::shared_ptr Select(const TReadDescription& readDescription) const; + std::vector GetCommitedBlobs(const TReadDescription& readDescription, const std::shared_ptr& pkSchema) const; +}; + +// Holds all metadata that is needed to perform read/scan +struct TReadMetadataBase { +public: + enum class ESorting { + NONE = 0 /* "not_sorted" */, + ASC /* "ascending" */, + DESC /* "descending" */, + }; +private: + const ESorting Sorting = ESorting::ASC; // Sorting inside returned batches + std::optional PKRangesFilter; + TProgramContainer Program; + std::shared_ptr IndexVersionsPointer; + TSnapshot RequestSnapshot; +protected: + std::shared_ptr ResultIndexSchema; + const TVersionedIndex& GetIndexVersions() const { + AFL_VERIFY(IndexVersionsPointer); + return *IndexVersionsPointer; + } +public: + using TConstPtr = std::shared_ptr; + + void SetPKRangesFilter(const TPKRangesFilter& value) { + Y_ABORT_UNLESS(IsSorted() && value.IsReverse() == IsDescSorted()); + Y_ABORT_UNLESS(!PKRangesFilter); + PKRangesFilter = value; + } + + const TPKRangesFilter& GetPKRangesFilter() const { + Y_ABORT_UNLESS(!!PKRangesFilter); + return *PKRangesFilter; + } + + ISnapshotSchema::TPtr GetResultSchema() const { + return ResultIndexSchema; + } + + ISnapshotSchema::TPtr GetLoadSchemaVerified(const TPortionInfo& porition) const; + + std::shared_ptr GetBlobSchema(const ui64 version) const { + return GetIndexVersions().GetSchema(version)->GetIndexInfo().ArrowSchema(); + } + + const TIndexInfo& GetIndexInfo(const std::optional& version = {}) const { + if (version && version < RequestSnapshot) { + return GetIndexVersions().GetSchema(*version)->GetIndexInfo(); + } + return ResultIndexSchema->GetIndexInfo(); + } + + TReadMetadataBase(const std::shared_ptr index, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema, const TSnapshot& requestSnapshot) + : Sorting(sorting) + , Program(ssaProgram) + , IndexVersionsPointer(index) + , RequestSnapshot(requestSnapshot) + , ResultIndexSchema(schema) + { + } + virtual ~TReadMetadataBase() = default; + + ui64 Limit = 0; + + virtual void Dump(IOutputStream& out) const { + out << " predicate{" << (PKRangesFilter ? PKRangesFilter->DebugString() : "no_initialized") << "}" + << " " << Sorting << " sorted"; + } + + std::set GetProcessingColumnIds() const { + std::set result; + for (auto&& i : GetProgram().GetProcessingColumns()) { + result.emplace(ResultIndexSchema->GetIndexInfo().GetColumnId(i)); + } + return result; + } + bool IsAscSorted() const { return Sorting == ESorting::ASC; } + bool IsDescSorted() const { return Sorting == ESorting::DESC; } + bool IsSorted() const { return IsAscSorted() || IsDescSorted(); } + + virtual std::unique_ptr StartScan(const std::shared_ptr& readContext) const = 0; + virtual std::vector GetKeyYqlSchema() const = 0; + + // TODO: can this only be done for base class? + friend IOutputStream& operator << (IOutputStream& out, const TReadMetadataBase& meta) { + meta.Dump(out); + return out; + } + + const TProgramContainer& GetProgram() const { + return Program; + } + + const TSnapshot& GetRequestSnapshot() const { + return RequestSnapshot; + } + + std::shared_ptr GetReplaceKey() const { + return ResultIndexSchema->GetIndexInfo().GetReplaceKey(); + } + + std::optional GetColumnNameDef(const ui32 columnId) const { + if (!ResultIndexSchema) { + return {}; + } + auto f = ResultIndexSchema->GetFieldByColumnIdOptional(columnId); + if (!f) { + return {}; + } + return f->name(); + } + + std::optional GetEntityName(const ui32 entityId) const { + if (!ResultIndexSchema) { + return {}; + } + auto result = ResultIndexSchema->GetIndexInfo().GetColumnNameOptional(entityId); + if (!!result) { + return result; + } + return ResultIndexSchema->GetIndexInfo().GetIndexNameOptional(entityId); + } + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/abstract/ya.make b/ydb/core/tx/columnshard/engines/reader/abstract/ya.make new file mode 100644 index 000000000000..9ad540de1170 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/abstract/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +SRCS( + abstract.cpp + read_metadata.cpp + constructor.cpp + read_context.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/versions + ydb/core/tx/columnshard/engines/insert_table + ydb/core/tx/program + ydb/core/protos + ydb/core/tx/columnshard/data_sharing/protos +) + +GENERATE_ENUM_SERIALIZATION(read_metadata.h) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp new file mode 100644 index 000000000000..878153f52baf --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.cpp @@ -0,0 +1,415 @@ +#include "actor.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader { +constexpr i64 DEFAULT_READ_AHEAD_BYTES = (i64)2 * 1024 * 1024 * 1024; +constexpr TDuration SCAN_HARD_TIMEOUT = TDuration::Minutes(10); +constexpr TDuration SCAN_HARD_TIMEOUT_GAP = TDuration::Seconds(5); + +namespace { +class TInFlightGuard: NNonCopyable::TNonCopyable { +private: + static inline TAtomicCounter InFlightGlobal = 0; + i64 InFlightGuarded = 0; +public: + ~TInFlightGuard() { + Return(InFlightGuarded); + } + + bool CanTake() { + return InFlightGlobal.Val() < DEFAULT_READ_AHEAD_BYTES || !InFlightGuarded; + } + + void Take(const ui64 bytes) { + InFlightGlobal.Add(bytes); + InFlightGuarded += bytes; + } + + void Return(const ui64 bytes) { + Y_ABORT_UNLESS(InFlightGlobal.Sub(bytes) >= 0); + InFlightGuarded -= bytes; + Y_ABORT_UNLESS(InFlightGuarded >= 0); + } +}; + +} + +void TColumnShardScan::PassAway() { + Send(ResourceSubscribeActorId, new TEvents::TEvPoisonPill); + Send(ReadCoordinatorActorId, new TEvents::TEvPoisonPill); + IActor::PassAway(); +} + +TColumnShardScan::TColumnShardScan(const TActorId& columnShardActorId, const TActorId& scanComputeActorId, const std::shared_ptr& storagesManager, + const TComputeShardingPolicy& computeShardingPolicy, ui32 scanId, ui64 txId, ui32 scanGen, ui64 requestCookie, + ui64 tabletId, TDuration timeout, const TReadMetadataBase::TConstPtr& readMetadataRange, + NKikimrDataEvents::EDataFormat dataFormat, const NColumnShard::TScanCounters& scanCountersPool) + : StoragesManager(storagesManager) + , ColumnShardActorId(columnShardActorId) + , ScanComputeActorId(scanComputeActorId) + , BlobCacheActorId(NBlobCache::MakeBlobCacheServiceId()) + , ScanId(scanId) + , TxId(txId) + , ScanGen(scanGen) + , RequestCookie(requestCookie) + , DataFormat(dataFormat) + , TabletId(tabletId) + , ReadMetadataRange(readMetadataRange) + , Deadline(TInstant::Now() + (timeout ? timeout + SCAN_HARD_TIMEOUT_GAP : SCAN_HARD_TIMEOUT)) + , ScanCountersPool(scanCountersPool) + , Stats(NTracing::TTraceClient::GetLocalClient("SHARD", ::ToString(TabletId)/*, "SCAN_TXID:" + ::ToString(TxId)*/)) + , ComputeShardingPolicy(computeShardingPolicy) +{ + AFL_VERIFY(ReadMetadataRange); + KeyYqlSchema = ReadMetadataRange->GetKeyYqlSchema(); +} + +void TColumnShardScan::Bootstrap(const TActorContext& ctx) { + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) + ("SelfId", SelfId())("TabletId", TabletId)("ScanId", ScanId)("TxId", TxId)("ScanGen", ScanGen) + ); + auto g = Stats->MakeGuard("bootstrap"); + ScanActorId = ctx.SelfID; + Schedule(Deadline, new TEvents::TEvWakeup); + + Y_ABORT_UNLESS(!ScanIterator); + ResourceSubscribeActorId = ctx.Register(new NResourceBroker::NSubscribe::TActor(TabletId, SelfId())); + ReadCoordinatorActorId = ctx.Register(new NBlobOperations::NRead::TReadCoordinatorActor(TabletId, SelfId())); + + std::shared_ptr context = std::make_shared(StoragesManager, ScanCountersPool, + ReadMetadataRange, SelfId(), ResourceSubscribeActorId, ReadCoordinatorActorId, ComputeShardingPolicy); + ScanIterator = ReadMetadataRange->StartScan(context); + auto startResult = ScanIterator->Start(); + StartInstant = TMonotonic::Now(); + if (!startResult) { + ACFL_ERROR("event", "BootstrapError")("error", startResult.GetErrorMessage()); + SendScanError("scanner_start_error:" + startResult.GetErrorMessage()); + Finish(NColumnShard::TScanCounters::EStatusFinish::ProblemOnStart); + } else { + + // propagate self actor id // TODO: FlagSubscribeOnSession ? + Send(ScanComputeActorId, new NKqp::TEvKqpCompute::TEvScanInitActor(ScanId, ctx.SelfID, ScanGen, TabletId), IEventHandle::FlagTrackDelivery); + + Become(&TColumnShardScan::StateScan); + ContinueProcessing(); + } +} + +void TColumnShardScan::HandleScan(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev) { + --InFlightReads; + auto g = Stats->MakeGuard("task_result"); + if (ev->Get()->GetErrorMessage()) { + ACFL_ERROR("event", "TEvTaskProcessedResult")("error", ev->Get()->GetErrorMessage()); + SendScanError("task_error:" + ev->Get()->GetErrorMessage()); + Finish(NColumnShard::TScanCounters::EStatusFinish::ConveyorInternalError); + } else { + ACFL_DEBUG("event", "TEvTaskProcessedResult"); + auto t = static_pointer_cast(ev->Get()->GetResult()); + Y_DEBUG_ABORT_UNLESS(dynamic_pointer_cast(ev->Get()->GetResult())); + if (!ScanIterator->Finished()) { + ScanIterator->Apply(t); + } + } + ContinueProcessing(); +} + +void TColumnShardScan::HandleScan(NKqp::TEvKqpCompute::TEvScanDataAck::TPtr& ev) { + auto g = Stats->MakeGuard("ack"); + Y_ABORT_UNLESS(!AckReceivedInstant); + AckReceivedInstant = TMonotonic::Now(); + + Y_ABORT_UNLESS(ev->Get()->Generation == ScanGen); + + ChunksLimiter = TChunksLimiter(ev->Get()->FreeSpace, ev->Get()->MaxChunksCount); + Y_ABORT_UNLESS(ev->Get()->MaxChunksCount == 1); + ACFL_DEBUG("event", "TEvScanDataAck")("info", ChunksLimiter.DebugString()); + if (ScanIterator) { + if (!!ScanIterator->GetAvailableResultsCount() && !*ScanIterator->GetAvailableResultsCount()) { + ScanCountersPool.OnEmptyAck(); + } else { + ScanCountersPool.OnNotEmptyAck(); + } + } + ContinueProcessing(); +} + +void TColumnShardScan::HandleScan(NKqp::TEvKqp::TEvAbortExecution::TPtr& ev) noexcept { + auto& msg = ev->Get()->Record; + const TString reason = ev->Get()->GetIssues().ToOneLineString(); + + auto prio = msg.GetStatusCode() == NYql::NDqProto::StatusIds::SUCCESS ? NActors::NLog::PRI_DEBUG : NActors::NLog::PRI_WARN; + LOG_LOG_S(*TlsActivationContext, prio, NKikimrServices::TX_COLUMNSHARD_SCAN, + "Scan " << ScanActorId << " got AbortExecution" + << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId + << " code: " << NYql::NDqProto::StatusIds_StatusCode_Name(msg.GetStatusCode()) + << " reason: " << reason); + + AbortReason = std::move(reason); + Finish(NColumnShard::TScanCounters::EStatusFinish::ExternalAbort); +} + +void TColumnShardScan::HandleScan(TEvents::TEvUndelivered::TPtr& ev) { + ui32 eventType = ev->Get()->SourceType; + switch (eventType) { + case NKqp::TEvKqpCompute::TEvScanInitActor::EventType: + AbortReason = "init failed"; + break; + case NKqp::TEvKqpCompute::TEvScanData::EventType: + AbortReason = "failed to send data batch"; + break; + } + + LOG_WARN_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, + "Scan " << ScanActorId << " undelivered event: " << eventType + << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId + << " reason: " << ev->Get()->Reason + << " description: " << AbortReason); + + Finish(NColumnShard::TScanCounters::EStatusFinish::UndeliveredEvent); +} + +void TColumnShardScan::HandleScan(TEvents::TEvWakeup::TPtr& /*ev*/) { + LOG_ERROR_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, + "Scan " << ScanActorId << " guard execution timeout" + << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId); + + Finish(NColumnShard::TScanCounters::EStatusFinish::Deadline); +} + +bool TColumnShardScan::ProduceResults() noexcept { + auto g = Stats->MakeGuard("ProduceResults"); + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build()("method", "produce result")); + + ACFL_DEBUG("stage", "start")("iterator", ScanIterator->DebugString()); + Y_ABORT_UNLESS(!Finished); + Y_ABORT_UNLESS(ScanIterator); + + if (ScanIterator->Finished()) { + ACFL_DEBUG("stage", "scan iterator is finished")("iterator", ScanIterator->DebugString()); + return false; + } + + if (!ChunksLimiter.HasMore()) { + ScanIterator->PrepareResults(); + ACFL_DEBUG("stage", "limit exhausted")("limit", ChunksLimiter.DebugString()); + return false; + } + + auto resultConclusion = ScanIterator->GetBatch(); + if (resultConclusion.IsFail()) { + ACFL_ERROR("stage", "got error")("iterator", ScanIterator->DebugString())("message", resultConclusion.GetErrorMessage()); + SendScanError(resultConclusion.GetErrorMessage()); + + ScanIterator.reset(); + Finish(NColumnShard::TScanCounters::EStatusFinish::IteratorInternalErrorResult); + return false; + } + + std::optional resultOpt = resultConclusion.DetachResult(); + if (!resultOpt) { + ACFL_DEBUG("stage", "no data is ready yet")("iterator", ScanIterator->DebugString()); + return false; + } + + auto& result = *resultOpt; + + if (!result.GetRecordsCount()) { + ACFL_DEBUG("stage", "got empty batch")("iterator", ScanIterator->DebugString()); + return true; + } + + auto& shardedBatch = result.GetShardedBatch(); + auto batch = shardedBatch.GetRecordBatch(); + int numRows = batch->num_rows(); + int numColumns = batch->num_columns(); + ACFL_DEBUG("stage", "ready result")("iterator", ScanIterator->DebugString())("columns", numColumns)("rows", result.GetRecordsCount()); + + AFL_VERIFY(DataFormat == NKikimrDataEvents::FORMAT_ARROW); + { + MakeResult(0); + if (shardedBatch.IsSharded()) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "compute_sharding_success")("count", shardedBatch.GetSplittedByShards().size())("info", ComputeShardingPolicy.DebugString()); + Result->SplittedBatches = shardedBatch.GetSplittedByShards(); + } else { + if (ComputeShardingPolicy.IsEnabled()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "compute_sharding_problems")("info", ComputeShardingPolicy.DebugString()); + } + } + TMemoryProfileGuard mGuard("SCAN_PROFILE::RESULT::TO_KQP", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + Result->ArrowBatch = shardedBatch.GetRecordBatch(); + Rows += batch->num_rows(); + Bytes += NArrow::GetTableDataSize(Result->ArrowBatch); + ACFL_DEBUG("stage", "data_format")("batch_size", NArrow::GetTableDataSize(Result->ArrowBatch))("num_rows", numRows)("batch_columns", JoinSeq(",", batch->schema()->field_names())); + } + if (CurrentLastReadKey) { + NArrow::NMerger::TSortableBatchPosition pNew(result.GetLastReadKey(), 0, result.GetLastReadKey()->schema()->field_names(), {}, ReadMetadataRange->IsDescSorted()); + NArrow::NMerger::TSortableBatchPosition pOld(CurrentLastReadKey, 0, CurrentLastReadKey->schema()->field_names(), {}, ReadMetadataRange->IsDescSorted()); + AFL_VERIFY(pOld < pNew)("old", pOld.DebugJson().GetStringRobust())("new", pNew.DebugJson().GetStringRobust()); + } + CurrentLastReadKey = result.GetLastReadKey(); + + Result->LastKey = ConvertLastKey(result.GetLastReadKey()); + SendResult(false, false); + ScanIterator->OnSentDataFromInterval(result.GetNotFinishedIntervalIdx()); + ACFL_DEBUG("stage", "finished")("iterator", ScanIterator->DebugString()); + return true; +} + +void TColumnShardScan::ContinueProcessing() { + if (!ScanIterator) { + ACFL_DEBUG("event", "ContinueProcessing")("stage", "iterator is not initialized"); + return; + } + // Send new results if there is available capacity + while (ScanIterator && ProduceResults()) { + } + + if (ScanIterator) { + // Switch to the next range if the current one is finished + if (ScanIterator->Finished()) { + if (ChunksLimiter.HasMore()) { + auto g = Stats->MakeGuard("Finish"); + MakeResult(); + SendResult(false, true); + ScanIterator.reset(); + Finish(NColumnShard::TScanCounters::EStatusFinish::Success); + } + } else { + while (true) { + TConclusion hasMoreData = ScanIterator->ReadNextInterval(); + if (hasMoreData.IsFail()) { + ACFL_ERROR("event", "ContinueProcessing")("error", hasMoreData.GetErrorMessage()); + ScanIterator.reset(); + SendScanError("iterator_error:" + hasMoreData.GetErrorMessage()); + return Finish(NColumnShard::TScanCounters::EStatusFinish::IteratorInternalErrorScan); + } else if (!*hasMoreData) { + break; + } + } + } + } + AFL_VERIFY(!ScanIterator || !ChunksLimiter.HasMore() || InFlightReads || ScanCountersPool.InWaiting())("scan_actor_id", ScanActorId)("tx_id", TxId)("scan_id", ScanId)("gen", ScanGen)("tablet", TabletId) + ("debug", ScanIterator->DebugString()); +} + +void TColumnShardScan::MakeResult(size_t reserveRows /*= 0*/) { + if (!Finished && !Result) { + Result = MakeHolder(ScanId, ScanGen); + if (reserveRows) { + Y_ABORT_UNLESS(DataFormat != NKikimrDataEvents::FORMAT_ARROW); + Result->Rows.reserve(reserveRows); + } + } +} + +void TColumnShardScan::AddRow(const TConstArrayRef& row) { + Result->Rows.emplace_back(TOwnedCellVec::Make(row)); + ++Rows; + + // NOTE: Some per-row overhead to deal with the case when no columns were requested + Bytes += std::max((ui64)8, (ui64)Result->Rows.back().DataSize()); +} + +NKikimr::TOwnedCellVec TColumnShardScan::ConvertLastKey(const std::shared_ptr& lastReadKey) { + Y_ABORT_UNLESS(lastReadKey, "last key must be passed"); + + struct TSingeRowWriter: public IRowWriter { + TOwnedCellVec Row; + bool Done = false; + void AddRow(const TConstArrayRef& row) override { + Y_ABORT_UNLESS(!Done); + Row = TOwnedCellVec::Make(row); + Done = true; + } + } singleRowWriter; + NArrow::TArrowToYdbConverter converter(KeyYqlSchema, singleRowWriter); + TString errStr; + bool ok = converter.Process(*lastReadKey, errStr); + Y_ABORT_UNLESS(ok, "%s", errStr.c_str()); + + Y_ABORT_UNLESS(singleRowWriter.Done); + return singleRowWriter.Row; +} + +bool TColumnShardScan::SendResult(bool pageFault, bool lastBatch) { + if (Finished) { + return true; + } + + Result->PageFault = pageFault; + Result->PageFaults = PageFaults; + Result->Finished = lastBatch; + if (ScanIterator) { + Result->AvailablePacks = ScanIterator->GetAvailableResultsCount(); + } + TDuration totalElapsedTime = TDuration::Seconds(GetElapsedTicksAsSeconds()); + // Result->TotalTime = totalElapsedTime - LastReportedElapsedTime; + // TODO: Result->CpuTime = ... + LastReportedElapsedTime = totalElapsedTime; + + PageFaults = 0; + + LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, + "Scan " << ScanActorId << " send ScanData to " << ScanComputeActorId + << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId + << " bytes: " << Bytes << " rows: " << Rows << " page faults: " << Result->PageFaults + << " finished: " << Result->Finished << " pageFault: " << Result->PageFault + << " arrow schema:\n" << (Result->ArrowBatch ? Result->ArrowBatch->schema()->ToString() : "")); + + Finished = Result->Finished; + if (Finished) { + ALS_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN) << + "Scanner finished " << ScanActorId << " and sent to " << ScanComputeActorId + << " packs: " << PacksSum << " txId: " << TxId << " scanId: " << ScanId << " gen: " << ScanGen << " tablet: " << TabletId + << " bytes: " << Bytes << "/" << BytesSum << " rows: " << Rows << "/" << RowsSum << " page faults: " << Result->PageFaults + << " finished: " << Result->Finished << " pageFault: " << Result->PageFault + << " stats:" << Stats->ToJson() << ";iterator:" << (ScanIterator ? ScanIterator->DebugString(false) : "NO"); + Result->StatsOnFinished = std::make_shared(ScanIterator->GetStats()); + } else { + Y_ABORT_UNLESS(ChunksLimiter.Take(Bytes)); + Result->RequestedBytesLimitReached = !ChunksLimiter.HasMore(); + Y_ABORT_UNLESS(AckReceivedInstant); + ScanCountersPool.AckWaitingInfo(TMonotonic::Now() - *AckReceivedInstant); + } + AckReceivedInstant.reset(); + + Send(ScanComputeActorId, Result.Release(), IEventHandle::FlagTrackDelivery); // TODO: FlagSubscribeOnSession ? + + ReportStats(); + + return true; +} + +void TColumnShardScan::SendScanError(const TString& reason) { + AFL_VERIFY(reason); + const TString msg = TStringBuilder() << "Scan failed at tablet " << TabletId << ", reason: " + reason; + + auto ev = MakeHolder(ScanGen, TabletId); + ev->Record.SetStatus(Ydb::StatusIds::GENERIC_ERROR); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_RESULT_UNAVAILABLE, msg); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + + Send(ScanComputeActorId, ev.Release()); +} + +void TColumnShardScan::Finish(const NColumnShard::TScanCounters::EStatusFinish status) { + LOG_DEBUG_S(*TlsActivationContext, NKikimrServices::TX_COLUMNSHARD_SCAN, + "Scan " << ScanActorId << " finished for tablet " << TabletId); + + Send(ColumnShardActorId, new NColumnShard::TEvPrivate::TEvReadFinished(RequestCookie, TxId)); + AFL_VERIFY(StartInstant); + ScanCountersPool.OnScanDuration(status, TMonotonic::Now() - *StartInstant); + ReportStats(); + PassAway(); +} + +void TColumnShardScan::ReportStats() { + Send(ColumnShardActorId, new NColumnShard::TEvPrivate::TEvScanStats(Rows, Bytes)); + Rows = 0; + Bytes = 0; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/actor/actor.h b/ydb/core/tx/columnshard/engines/reader/actor/actor.h new file mode 100644 index 000000000000..33134f85bcab --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/actor/actor.h @@ -0,0 +1,184 @@ +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include + +namespace NKikimr::NOlap::NReader { + +class TColumnShardScan: public TActorBootstrapped, NArrow::IRowWriter { +private: + TActorId ResourceSubscribeActorId; + TActorId ReadCoordinatorActorId; + const std::shared_ptr StoragesManager; + std::optional StartInstant; +public: + static constexpr auto ActorActivityType() { + return NKikimrServices::TActivity::KQP_OLAP_SCAN; + } + +public: + virtual void PassAway() override; + + TColumnShardScan(const TActorId& columnShardActorId, const TActorId& scanComputeActorId, + const std::shared_ptr& storagesManager, const TComputeShardingPolicy& computeShardingPolicy, + ui32 scanId, ui64 txId, ui32 scanGen, ui64 requestCookie, + ui64 tabletId, TDuration timeout, const TReadMetadataBase::TConstPtr& readMetadataRange, + NKikimrDataEvents::EDataFormat dataFormat, const NColumnShard::TScanCounters& scanCountersPool); + + void Bootstrap(const TActorContext& ctx); + +private: + STATEFN(StateScan) { + auto g = Stats->MakeGuard("processing"); + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN) + ("SelfId", SelfId())("TabletId", TabletId)("ScanId", ScanId)("TxId", TxId)("ScanGen", ScanGen) + ); + switch (ev->GetTypeRewrite()) { + hFunc(NKqp::TEvKqpCompute::TEvScanDataAck, HandleScan); + hFunc(NKqp::TEvKqp::TEvAbortExecution, HandleScan); + hFunc(TEvents::TEvUndelivered, HandleScan); + hFunc(TEvents::TEvWakeup, HandleScan); + hFunc(NConveyor::TEvExecution::TEvTaskProcessedResult, HandleScan); + default: + AFL_VERIFY(false)("unexpected_event", ev->GetTypeName()); + } + } + + void HandleScan(NConveyor::TEvExecution::TEvTaskProcessedResult::TPtr& ev); + + void HandleScan(NKqp::TEvKqpCompute::TEvScanDataAck::TPtr& ev); + + // Returns true if it was able to produce new batch + bool ProduceResults() noexcept; + + void ContinueProcessing(); + + void HandleScan(NKqp::TEvKqp::TEvAbortExecution::TPtr& ev) noexcept; + + void HandleScan(TEvents::TEvUndelivered::TPtr& ev); + + void HandleScan(TEvents::TEvWakeup::TPtr& /*ev*/); + +private: + void MakeResult(size_t reserveRows = 0); + + void AddRow(const TConstArrayRef& row) override; + + TOwnedCellVec ConvertLastKey(const std::shared_ptr& lastReadKey); + + class TScanStatsOwner: public NKqp::TEvKqpCompute::IShardScanStats { + private: + YDB_READONLY_DEF(TReadStats, Stats); + public: + TScanStatsOwner(const TReadStats& stats) + : Stats(stats) { + + } + + virtual THashMap GetMetrics() const override { + THashMap result; + result["compacted_bytes"] = Stats.CompactedPortionsBytes; + result["inserted_bytes"] = Stats.InsertedPortionsBytes; + result["committed_bytes"] = Stats.CommittedPortionsBytes; + return result; + } + }; + + bool SendResult(bool pageFault, bool lastBatch); + + void SendScanError(const TString& reason); + + void Finish(const NColumnShard::TScanCounters::EStatusFinish status); + + void ReportStats(); + +private: + const TActorId ColumnShardActorId; + const TActorId ReadBlobsActorId; + const TActorId ScanComputeActorId; + std::optional AckReceivedInstant; + TActorId ScanActorId; + TActorId BlobCacheActorId; + const ui32 ScanId; + const ui64 TxId; + const ui32 ScanGen; + const ui64 RequestCookie; + const NKikimrDataEvents::EDataFormat DataFormat; + const ui64 TabletId; + + TReadMetadataBase::TConstPtr ReadMetadataRange; + std::unique_ptr ScanIterator; + + std::vector> KeyYqlSchema; + const TSerializedTableRange TableRange; + const TSmallVec SkipNullKeys; + const TInstant Deadline; + NColumnShard::TConcreteScanCounters ScanCountersPool; + + TMaybe AbortReason; + + TChunksLimiter ChunksLimiter; + THolder Result; + std::shared_ptr CurrentLastReadKey; + i64 InFlightReads = 0; + bool Finished = false; + + class TBlobStats { + private: + ui64 PartsCount = 0; + ui64 Bytes = 0; + TDuration ReadingDurationSum; + TDuration ReadingDurationMax; + NMonitoring::THistogramPtr BlobDurationsCounter; + NMonitoring::THistogramPtr ByteDurationsCounter; + public: + TBlobStats(const NMonitoring::THistogramPtr blobDurationsCounter, const NMonitoring::THistogramPtr byteDurationsCounter) + : BlobDurationsCounter(blobDurationsCounter) + , ByteDurationsCounter(byteDurationsCounter) { + + } + void Received(const TBlobRange& br, const TDuration d) { + ReadingDurationSum += d; + ReadingDurationMax = Max(ReadingDurationMax, d); + ++PartsCount; + Bytes += br.Size; + BlobDurationsCounter->Collect(d.MilliSeconds()); + ByteDurationsCounter->Collect((i64)d.MilliSeconds(), br.Size); + } + TString DebugString() const { + TStringBuilder sb; + if (PartsCount) { + sb << "p_count=" << PartsCount << ";"; + sb << "bytes=" << Bytes << ";"; + sb << "d_avg=" << ReadingDurationSum / PartsCount << ";"; + sb << "d_max=" << ReadingDurationMax << ";"; + } else { + sb << "NO_BLOBS;"; + } + return sb; + } + }; + + NTracing::TTraceClientGuard Stats; + const TComputeShardingPolicy ComputeShardingPolicy; + ui64 Rows = 0; + ui64 BytesSum = 0; + ui64 RowsSum = 0; + ui64 PacksSum = 0; + ui64 Bytes = 0; + ui32 PageFaults = 0; + TDuration LastReportedElapsedTime; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/actor/ya.make b/ydb/core/tx/columnshard/engines/reader/actor/ya.make new file mode 100644 index 000000000000..c0b913a0c123 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/actor/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + actor.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/abstract + ydb/core/kqp/compute_actor + ydb/library/yql/core/issue +) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp new file mode 100644 index 000000000000..26f14784a032 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.cpp @@ -0,0 +1,9 @@ +#include "conveyor_task.h" + +namespace NKikimr::NOlap::NReader { + +bool IDataTasksProcessor::ITask::Apply(IDataReader& indexedDataRead) const { + return DoApply(indexedDataRead); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/conveyor_task.h b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h similarity index 70% rename from ydb/core/tx/columnshard/engines/reader/conveyor_task.h rename to ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h index ef535257cfab..f29b19ecee59 100644 --- a/ydb/core/tx/columnshard/engines/reader/conveyor_task.h +++ b/ydb/core/tx/columnshard/engines/reader/common/conveyor_task.h @@ -2,11 +2,9 @@ #include #include -namespace NKikimr::NOlap { -class IDataReader; -} +namespace NKikimr::NOlap::NReader { -namespace NKikimr::NColumnShard { +class IDataReader; class IDataTasksProcessor { public: @@ -14,7 +12,7 @@ class IDataTasksProcessor { private: using TBase = NConveyor::ITask; protected: - virtual bool DoApply(NOlap::IDataReader& indexedDataRead) const = 0; + virtual bool DoApply(IDataReader& indexedDataRead) const = 0; public: ITask(const std::optional ownerId = {}) : TBase(ownerId) { @@ -23,7 +21,7 @@ class IDataTasksProcessor { using TPtr = std::shared_ptr; virtual ~ITask() = default; - bool Apply(NOlap::IDataReader& indexedDataRead) const; + bool Apply(IDataReader& indexedDataRead) const; }; }; diff --git a/ydb/core/tx/columnshard/engines/reader/description.cpp b/ydb/core/tx/columnshard/engines/reader/common/description.cpp similarity index 100% rename from ydb/core/tx/columnshard/engines/reader/description.cpp rename to ydb/core/tx/columnshard/engines/reader/common/description.cpp diff --git a/ydb/core/tx/columnshard/engines/reader/description.h b/ydb/core/tx/columnshard/engines/reader/common/description.h similarity index 93% rename from ydb/core/tx/columnshard/engines/reader/description.h rename to ydb/core/tx/columnshard/engines/reader/common/description.h index f05943bffa07..704b4bd101a9 100644 --- a/ydb/core/tx/columnshard/engines/reader/description.h +++ b/ydb/core/tx/columnshard/engines/reader/common/description.h @@ -1,8 +1,10 @@ #pragma once -#include +#include #include +#include + #include -namespace NKikimr::NOlap { +namespace NKikimr::NOlap::NReader { // Describes read/scan request struct TReadDescription { diff --git a/ydb/core/tx/columnshard/engines/reader/queue.cpp b/ydb/core/tx/columnshard/engines/reader/common/queue.cpp similarity index 100% rename from ydb/core/tx/columnshard/engines/reader/queue.cpp rename to ydb/core/tx/columnshard/engines/reader/common/queue.cpp diff --git a/ydb/core/tx/columnshard/engines/reader/queue.h b/ydb/core/tx/columnshard/engines/reader/common/queue.h similarity index 100% rename from ydb/core/tx/columnshard/engines/reader/queue.h rename to ydb/core/tx/columnshard/engines/reader/common/queue.h diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.cpp b/ydb/core/tx/columnshard/engines/reader/common/result.cpp new file mode 100644 index 000000000000..484165c67b54 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/common/result.cpp @@ -0,0 +1,51 @@ +#include "result.h" + +namespace NKikimr::NOlap::NReader { + +class TCurrentBatch { +private: + std::vector Results; + ui64 RecordsCount = 0; +public: + ui64 GetRecordsCount() const { + return RecordsCount; + } + + void AddChunk(TPartialReadResult&& res) { + RecordsCount += res.GetRecordsCount(); + Results.emplace_back(std::move(res)); + } + + void FillResult(std::vector& result) const { + if (Results.empty()) { + return; + } + for (auto&& i : Results) { + result.emplace_back(std::move(i)); + } + } +}; + +std::vector TPartialReadResult::SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult) { + std::vector resultBatches; + TCurrentBatch currentBatch; + for (auto&& i : resultsExt) { + AFL_VERIFY(i.GetRecordsCount()); + currentBatch.AddChunk(std::move(i)); + if (currentBatch.GetRecordsCount() >= maxRecordsInResult) { + resultBatches.emplace_back(std::move(currentBatch)); + currentBatch = TCurrentBatch(); + } + } + if (currentBatch.GetRecordsCount()) { + resultBatches.emplace_back(std::move(currentBatch)); + } + + std::vector result; + for (auto&& i : resultBatches) { + i.FillResult(result); + } + return result; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/common/result.h b/ydb/core/tx/columnshard/engines/reader/common/result.h new file mode 100644 index 000000000000..2c3f698bf7d7 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/common/result.h @@ -0,0 +1,87 @@ +#pragma once +#include +#include +#include +#include + +#include +namespace NKikimr::NOlap::NReader { + +// Represents a batch of rows produced by ASC or DESC scan with applied filters and partial aggregation +class TPartialReadResult { +private: + YDB_READONLY_DEF(std::vector>, ResourcesGuards); + NArrow::TShardedRecordBatch ResultBatch; + + // This 1-row batch contains the last key that was read while producing the ResultBatch. + // NOTE: it might be different from the Key of last row in ResulBatch in case of filtering/aggregation/limit + std::shared_ptr LastReadKey; + YDB_READONLY_DEF(std::optional, NotFinishedIntervalIdx); + +public: + void Cut(const ui32 limit) { + ResultBatch.Cut(limit); + } + + const arrow::Table& GetResultBatch() const { + return *ResultBatch.GetRecordBatch(); + } + + const std::shared_ptr& GetResultBatchPtrVerified() const { + AFL_VERIFY(ResultBatch.GetRecordBatch()); + return ResultBatch.GetRecordBatch(); + } + + const std::shared_ptr& GetResourcesGuardOnly() const { + AFL_VERIFY(ResourcesGuards.size() == 1); + AFL_VERIFY(!!ResourcesGuards.front()); + return ResourcesGuards.front(); + } + + ui64 GetMemorySize() const { + return ResultBatch.GetMemorySize(); + } + + ui64 GetRecordsCount() const { + return ResultBatch.GetRecordsCount(); + } + + static std::vector SplitResults(std::vector&& resultsExt, const ui32 maxRecordsInResult); + + const NArrow::TShardedRecordBatch& GetShardedBatch() const { + return ResultBatch; + } + + const std::shared_ptr& GetLastReadKey() const { + return LastReadKey; + } + + explicit TPartialReadResult( + const std::vector>& resourcesGuards, + const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) + : ResourcesGuards(resourcesGuards) + , ResultBatch(batch) + , LastReadKey(lastKey) + , NotFinishedIntervalIdx(notFinishedIntervalIdx) + { + for (auto&& i : ResourcesGuards) { + AFL_VERIFY(i); + } + Y_ABORT_UNLESS(ResultBatch.GetRecordsCount()); + Y_ABORT_UNLESS(LastReadKey); + Y_ABORT_UNLESS(LastReadKey->num_rows() == 1); + } + + explicit TPartialReadResult( + const std::shared_ptr& resourcesGuards, + const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) + : TPartialReadResult(std::vector>({resourcesGuards}), batch, lastKey, notFinishedIntervalIdx) { + AFL_VERIFY(resourcesGuards); + } + + explicit TPartialReadResult(const NArrow::TShardedRecordBatch& batch, std::shared_ptr lastKey, const std::optional notFinishedIntervalIdx) + : TPartialReadResult(std::vector>(), batch, lastKey, notFinishedIntervalIdx) { + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/common/stats.cpp b/ydb/core/tx/columnshard/engines/reader/common/stats.cpp new file mode 100644 index 000000000000..36be742dd0f6 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/common/stats.cpp @@ -0,0 +1,28 @@ +#include "stats.h" +#include +#include + +namespace NKikimr::NOlap::NReader { + +void TReadStats::PrintToLog() { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN) + ("event", "statistic") + ("begin", BeginTimestamp) + ("index_granules", IndexGranules) + ("index_portions", IndexPortions) + ("index_batches", IndexBatches) + ("committed_batches", CommittedBatches) + ("schema_columns", SchemaColumns) + ("filter_columns", FilterColumns) + ("additional_columns", AdditionalColumns) + ("compacted_portions_bytes", CompactedPortionsBytes) + ("inserted_portions_bytes", InsertedPortionsBytes) + ("committed_portions_bytes", CommittedPortionsBytes) + ("data_filter_bytes", DataFilterBytes) + ("data_additional_bytes", DataAdditionalBytes) + ("delta_bytes", CompactedPortionsBytes + InsertedPortionsBytes + CommittedPortionsBytes - DataFilterBytes - DataAdditionalBytes) + ("selected_rows", SelectedRows) + ; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/common/stats.h b/ydb/core/tx/columnshard/engines/reader/common/stats.h new file mode 100644 index 000000000000..3feb8459cee0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/common/stats.h @@ -0,0 +1,41 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NReader { + +struct TReadStats { + TInstant BeginTimestamp; + ui64 IndexGranules{0}; + ui64 IndexPortions{0}; + ui64 IndexBatches{0}; + ui64 CommittedBatches{0}; + ui64 CommittedPortionsBytes = 0; + ui64 InsertedPortionsBytes = 0; + ui64 CompactedPortionsBytes = 0; + ui64 DataFilterBytes{0}; + ui64 DataAdditionalBytes{0}; + + ui32 SchemaColumns = 0; + ui32 FilterColumns = 0; + ui32 AdditionalColumns = 0; + + ui32 SelectedRows = 0; + + TReadStats() + : BeginTimestamp(TInstant::Now()) { + } + + void PrintToLog(); + + ui64 GetReadBytes() const { + return CompactedPortionsBytes + InsertedPortionsBytes + CompactedPortionsBytes; + } + + TDuration Duration() { + return TInstant::Now() - BeginTimestamp; + } +}; + + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/common/ya.make b/ydb/core/tx/columnshard/engines/reader/common/ya.make new file mode 100644 index 000000000000..8c7beb01bd69 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/common/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + conveyor_task.cpp + queue.cpp + description.cpp + result.cpp + stats.cpp +) + +PEERDIR( + ydb/core/tx/program + ydb/core/formats/arrow/reader +) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/conveyor_task.cpp b/ydb/core/tx/columnshard/engines/reader/conveyor_task.cpp deleted file mode 100644 index 736dbcf5be71..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/conveyor_task.cpp +++ /dev/null @@ -1,10 +0,0 @@ -#include "conveyor_task.h" -#include "read_context.h" - -namespace NKikimr::NColumnShard { - -bool IDataTasksProcessor::ITask::Apply(NOlap::IDataReader& indexedDataRead) const { - return DoApply(indexedDataRead); -} - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp deleted file mode 100644 index 7ea68e1a3398..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.cpp +++ /dev/null @@ -1,41 +0,0 @@ -#include "column_assembler.h" -#include "plain_read_data.h" - -namespace NKikimr::NOlap::NPlainReader { - -bool TAssembleBatch::DoExecute() { - /// @warning The replace logic is correct only in assumption that predicate is applied over a part of ReplaceKey. - /// It's not OK to apply predicate before replacing key duplicates otherwise. - /// Assumption: dup(A, B) <=> PK(A) = PK(B) => Predicate(A) = Predicate(B) => all or no dups for PK(A) here - - auto batchConstructor = BuildBatchConstructor(FetchColumns->GetFilteredSchemaVerified()); - - Y_ABORT_UNLESS(batchConstructor.GetColumnsCount()); - - TPortionInfo::TPreparedBatchData::TAssembleOptions options; - auto addBatch = batchConstructor.AssembleTable(options); - Y_ABORT_UNLESS(addBatch); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN) - ("columns_count", addBatch->num_columns())("num_rows", addBatch->num_rows()); - Filter->Apply(addBatch); - Result = NArrow::ToBatch(addBatch, true); - - return true; -} - -bool TAssembleFFBatch::DoApply(IDataReader& /*owner*/) const { - Source->InitFetchStageData(Result); - return true; -} - -TAssembleBatch::TAssembleBatch(const std::shared_ptr& context, const std::shared_ptr& portionInfo, - const std::shared_ptr& source, const std::shared_ptr& columns, const THashMap& blobs, const std::shared_ptr& filter) - : TBase(context, portionInfo, source, std::move(blobs)) - , Filter(filter) - , TaskGuard(Context->GetCommonContext()->GetCounters().GetAssembleTasksGuard()) - , FetchColumns(columns) -{ - TBase::SetPriority(TBase::EPriority::High); -} - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.h deleted file mode 100644 index 9158620e0991..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/column_assembler.h +++ /dev/null @@ -1,38 +0,0 @@ -#pragma once -#include "source.h" -#include -#include -#include -#include -#include -#include "filter_assembler.h" - -namespace NKikimr::NOlap::NPlainReader { -class TBatch; -class TAssembleBatch: public TAssemblerCommon, public NColumnShard::TMonitoringObjectsCounter { -private: - using TBase = TAssemblerCommon; - std::shared_ptr Filter; -protected: - std::shared_ptr Result; - const NColumnShard::TCounterGuard TaskGuard; - const std::shared_ptr FetchColumns; - virtual bool DoExecute() override; -public: - virtual TString GetTaskClassIdentifier() const override { - return "PlainReader::TAssembleBatch"; - } - - TAssembleBatch(const std::shared_ptr& context, const std::shared_ptr& portionInfo, - const std::shared_ptr& source, const std::shared_ptr& columns, const THashMap& blobs, const std::shared_ptr& filter); -}; - -class TAssembleFFBatch: public TAssembleBatch { -private: - using TBase = TAssembleBatch; -protected: - virtual bool DoApply(IDataReader& owner) const override; -public: - using TBase::TBase; -}; -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp new file mode 100644 index 000000000000..87315949329a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.cpp @@ -0,0 +1,42 @@ +#include "constructor.h" +#include "resolver.h" +#include "read_metadata.h" +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +NKikimr::TConclusionStatus TIndexScannerConstructor::ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const { + AFL_VERIFY(vIndex); + auto& indexInfo = vIndex->GetSchema(Snapshot)->GetIndexInfo(); + TIndexColumnResolver columnResolver(indexInfo); + return TBase::ParseProgram(vIndex, proto.GetOlapProgramType(), proto.GetOlapProgram(), read, columnResolver); +} + +std::vector TIndexScannerConstructor::GetPrimaryKeyScheme(const NColumnShard::TColumnShard* self) const { + auto& indexInfo = self->TablesManager.GetIndexInfo(Snapshot); + return indexInfo.GetPrimaryKeyColumns(); +} + +NKikimr::TConclusion> TIndexScannerConstructor::DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { + auto& insertTable = self->InsertTable; + auto& index = self->TablesManager.GetPrimaryIndex(); + if (!insertTable || !index) { + return std::shared_ptr(); + } + + if (read.GetSnapshot().GetPlanStep() < self->GetMinReadStep()) { + return TConclusionStatus::Fail(TStringBuilder() << "Snapshot too old: " << read.GetSnapshot()); + } + + TDataStorageAccessor dataAccessor(insertTable, index); + auto readMetadata = std::make_shared(index->CopyVersionedIndexPtr(), read.GetSnapshot(), + IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, read.GetProgram()); + + auto initResult = readMetadata->Init(read, dataAccessor); + if (!initResult) { + return initResult; + } + return dynamic_pointer_cast(readMetadata); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.h new file mode 100644 index 000000000000..bb576fdbdc70 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/constructor.h @@ -0,0 +1,17 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +class TIndexScannerConstructor: public IScannerConstructor { +private: + using TBase = IScannerConstructor; +protected: + virtual TConclusion> DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const override; +public: + using TBase::TBase; + virtual TConclusionStatus ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const override; + virtual std::vector GetPrimaryKeyScheme(const NColumnShard::TColumnShard* self) const override; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp new file mode 100644 index 000000000000..a664f71756c7 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.cpp @@ -0,0 +1,57 @@ +#include "read_metadata.h" +#include +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +std::unique_ptr TReadMetadata::StartScan(const std::shared_ptr& readContext) const { + return std::make_unique(readContext, readContext->GetReadMetadataPtrVerifiedAs()); +} + +TConclusionStatus TReadMetadata::Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor) { + SetPKRangesFilter(readDescription.PKRangesFilter); + + /// @note We could have column name changes between schema versions: + /// Add '1:foo', Drop '1:foo', Add '2:foo'. Drop should hide '1:foo' from reads. + /// It's expected that we have only one version on 'foo' in blob and could split them by schema {planStep:txId}. + /// So '1:foo' would be omitted in blob records for the column in new snapshots. And '2:foo' - in old ones. + /// It's not possible for blobs with several columns. There should be a special logic for them. + CommittedBlobs = dataAccessor.GetCommitedBlobs(readDescription, ResultIndexSchema->GetIndexInfo().GetReplaceKey()); + + SelectInfo = dataAccessor.Select(readDescription); + StatsMode = readDescription.StatsMode; + return TConclusionStatus::Success(); +} + +std::set TReadMetadata::GetEarlyFilterColumnIds() const { + auto& indexInfo = ResultIndexSchema->GetIndexInfo(); + std::set result; + for (auto&& i : GetProgram().GetEarlyFilterColumns()) { + auto id = indexInfo.GetColumnIdOptional(i); + if (id) { + result.emplace(*id); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("early_filter_column", i); + } + } + return result; +} + +std::set TReadMetadata::GetPKColumnIds() const { + std::set result; + auto& indexInfo = ResultIndexSchema->GetIndexInfo(); + for (auto&& i : indexInfo.GetPrimaryKeyColumns()) { + Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnId(i.first)).second); + } + return result; +} + +std::shared_ptr TReadMetadata::BuildReader(const std::shared_ptr& context) const { + return std::make_shared(context); +} + +NArrow::NMerger::TSortableBatchPosition TReadMetadata::BuildSortedPosition(const NArrow::TReplaceKey& key) const { + return NArrow::NMerger::TSortableBatchPosition(key.ToBatch(GetReplaceKey()), 0, + GetReplaceKey()->field_names(), {}, IsDescSorted()); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h new file mode 100644 index 000000000000..8be1cf19a97c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/read_metadata.h @@ -0,0 +1,87 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +// Holds all metadata that is needed to perform read/scan +struct TReadMetadata : public TReadMetadataBase { + using TBase = TReadMetadataBase; +public: + using TConstPtr = std::shared_ptr; + + NArrow::NMerger::TSortableBatchPosition BuildSortedPosition(const NArrow::TReplaceKey& key) const; + std::shared_ptr BuildReader(const std::shared_ptr& context) const; + + bool HasProcessingColumnIds() const { + return GetProgram().HasProcessingColumnIds(); + } + + std::shared_ptr SelectInfo; + NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; + std::vector CommittedBlobs; + std::shared_ptr ReadStats; + + TReadMetadata(const std::shared_ptr info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) + : TBase(info, sorting, ssaProgram, info->GetSchema(snapshot), snapshot) + , ReadStats(std::make_shared()) + { + } + + virtual std::vector GetKeyYqlSchema() const override { + return GetResultSchema()->GetIndexInfo().GetPrimaryKeyColumns(); + } + + TConclusionStatus Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor); + + std::vector GetColumnsOrder() const { + auto schema = GetResultSchema(); + std::vector result; + for (auto&& i : schema->GetSchema()->fields()) { + result.emplace_back(i->name()); + } + return result; + } + + std::set GetEarlyFilterColumnIds() const; + std::set GetPKColumnIds() const; + + bool Empty() const { + Y_ABORT_UNLESS(SelectInfo); + return SelectInfo->PortionsOrderedPK.empty() && CommittedBlobs.empty(); + } + + size_t NumIndexedChunks() const { + Y_ABORT_UNLESS(SelectInfo); + return SelectInfo->NumChunks(); + } + + size_t NumIndexedBlobs() const { + Y_ABORT_UNLESS(SelectInfo); + return SelectInfo->Stats().Blobs; + } + + std::unique_ptr StartScan(const std::shared_ptr& readContext) const override; + + void Dump(IOutputStream& out) const override { + out << " index chunks: " << NumIndexedChunks() + << " index blobs: " << NumIndexedBlobs() + << " committed blobs: " << CommittedBlobs.size() + // << " with program steps: " << (Program ? Program->Steps.size() : 0) + << " at snapshot: " << GetRequestSnapshot().DebugString(); + TBase::Dump(out); + if (SelectInfo) { + out << ", " << *SelectInfo; + } + } + + friend IOutputStream& operator << (IOutputStream& out, const TReadMetadata& meta) { + meta.Dump(out); + return out; + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.cpp new file mode 100644 index 000000000000..2b90c5f2faa4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.cpp @@ -0,0 +1,5 @@ +#include "resolver.h" + +namespace NKikimr::NOlap::NReader::NPlain { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h new file mode 100644 index 000000000000..c5a2998a54c1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/resolver.h @@ -0,0 +1,32 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +class TIndexColumnResolver: public IColumnResolver { + const NOlap::TIndexInfo& IndexInfo; + +public: + explicit TIndexColumnResolver(const NOlap::TIndexInfo& indexInfo) + : IndexInfo(indexInfo) { + } + + virtual std::optional GetColumnIdOptional(const TString& name) const override { + return IndexInfo.GetColumnIdOptional(name); + } + + TString GetColumnName(ui32 id, bool required) const override { + return IndexInfo.GetColumnName(id, required); + } + + const NTable::TScheme::TTableSchema& GetSchema() const override { + return IndexInfo; + } + + NSsa::TColumnInfo GetDefaultColumn() const override { + return NSsa::TColumnInfo::Original((ui32)NOlap::TIndexInfo::ESpecialColumn::PLAN_STEP, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make new file mode 100644 index 000000000000..b91efa4346d8 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + constructor.cpp + resolver.cpp + read_metadata.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/context.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/context.cpp deleted file mode 100644 index 930e91ad544d..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/context.cpp +++ /dev/null @@ -1,177 +0,0 @@ -#include "context.h" -#include "source.h" - -namespace NKikimr::NOlap::NPlainReader { - -std::shared_ptr TSpecialReadContext::BuildMerger() const { - return std::make_shared(ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse()); -} - -ui64 TSpecialReadContext::GetMemoryForSources(const std::map>& sources, const bool isExclusive) { - ui64 result = 0; - for (auto&& i : sources) { - auto fetchingPlan = GetColumnsFetchingPlan(i.second, isExclusive); - AFL_VERIFY(i.second->GetIntervalsCount()); - result += fetchingPlan->PredictRawBytes(i.second) / i.second->GetIntervalsCount(); - } - return result; -} - -std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source, const bool exclusiveSource) const { - const bool needSnapshots = !exclusiveSource || ReadMetadata->GetSnapshot() < source->GetRecordSnapshotMax(); - const bool partialUsageByPK = ReadMetadata->GetPKRangesFilter().IsPortionInPartialUsage(source->GetStartReplaceKey(), source->GetFinishReplaceKey(), ReadMetadata->GetIndexInfo()); - auto result = CacheFetchingScripts[needSnapshots ? 1 : 0][exclusiveSource ? 1 : 0][partialUsageByPK ? 1 : 0]; - if (!result) { - return std::make_shared(source->GetRecordsCount(), "fake"); - } - return result; -} - -std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, const bool exclusiveSource, const bool partialUsageByPredicateExt) const { - std::shared_ptr result = std::make_shared(); - std::shared_ptr current = result; - const bool partialUsageByPredicate = partialUsageByPredicateExt && PredicateColumns->GetColumnsCount(); - if (!!IndexChecker) { - current = current->AttachNext(std::make_shared(std::make_shared(IndexChecker->GetIndexIds()))); - current = current->AttachNext(std::make_shared(IndexChecker)); - } - if (!EFColumns->GetColumnsCount() && !partialUsageByPredicate) { - TColumnsSet columnsFetch = *FFColumns; - if (needSnapshots) { - columnsFetch = columnsFetch + *SpecColumns; - } - if (!exclusiveSource) { - columnsFetch = columnsFetch + *PKColumns + *SpecColumns; - } - if (columnsFetch.GetColumnsCount()) { - current = current->AttachNext(std::make_shared(std::make_shared(columnsFetch), "simple")); - current = current->AttachNext(std::make_shared(std::make_shared(columnsFetch))); - } else { - return nullptr; - } - } else if (exclusiveSource) { - TColumnsSet columnsFetch = *EFColumns; - if (needSnapshots || FFColumns->Cross(*SpecColumns)) { - columnsFetch = columnsFetch + *SpecColumns; - } - if (partialUsageByPredicate) { - columnsFetch = columnsFetch + *PredicateColumns; - } - AFL_VERIFY(columnsFetch.GetColumnsCount()); - current = current->AttachNext(std::make_shared(std::make_shared(columnsFetch), "ef")); - - if (needSnapshots || FFColumns->Cross(*SpecColumns)) { - current = current->AttachNext(std::make_shared(SpecColumns)); - current = current->AttachNext(std::make_shared()); - columnsFetch = columnsFetch - *SpecColumns; - } - if (partialUsageByPredicate) { - current = current->AttachNext(std::make_shared(PredicateColumns)); - current = current->AttachNext(std::make_shared()); - columnsFetch = columnsFetch - *PredicateColumns; - } - if (columnsFetch.GetColumnsCount()) { - current = current->AttachNext(std::make_shared(std::make_shared(columnsFetch))); - } - for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { - if (!i->IsFilterOnly()) { - break; - } - current = current->AttachNext(std::make_shared(i)); - } - TColumnsSet columnsAdditionalFetch = *FFColumns - *EFColumns - *SpecColumns; - if (partialUsageByPredicate) { - columnsAdditionalFetch = columnsAdditionalFetch - *PredicateColumns; - } - if (columnsAdditionalFetch.GetColumnsCount()) { - current = current->AttachNext(std::make_shared(std::make_shared(columnsAdditionalFetch))); - current = current->AttachNext(std::make_shared(std::make_shared(columnsAdditionalFetch))); - } - } else { - TColumnsSet columnsFetch = *MergeColumns + *EFColumns; - AFL_VERIFY(columnsFetch.GetColumnsCount()); - current = current->AttachNext(std::make_shared(std::make_shared(columnsFetch), "full")); - current = current->AttachNext(std::make_shared(SpecColumns)); - if (needSnapshots) { - current = current->AttachNext(std::make_shared()); - } - current = current->AttachNext(std::make_shared(PKColumns)); - if (partialUsageByPredicate) { - current = current->AttachNext(std::make_shared()); - } - const TColumnsSet columnsFetchEF = columnsFetch - *SpecColumns - *PKColumns; - current = current->AttachNext(std::make_shared(std::make_shared(columnsFetchEF))); - for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { - if (!i->IsFilterOnly()) { - break; - } - current = current->AttachNext(std::make_shared(i)); - } - const TColumnsSet columnsAdditionalFetch = *FFColumns - *EFColumns - *SpecColumns - *PKColumns - *PredicateColumns; - if (columnsAdditionalFetch.GetColumnsCount()) { - current = current->AttachNext(std::make_shared(std::make_shared(columnsAdditionalFetch))); - current = current->AttachNext(std::make_shared(std::make_shared(columnsAdditionalFetch))); - } - } - return result->GetNextStep(); -} - -TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& commonContext) - : CommonContext(commonContext) -{ - ReadMetadata = dynamic_pointer_cast(CommonContext->GetReadMetadata()); - Y_ABORT_UNLESS(ReadMetadata); - Y_ABORT_UNLESS(ReadMetadata->SelectInfo); - - auto readSchema = ReadMetadata->GetLoadSchema(ReadMetadata->GetSnapshot()); - SpecColumns = std::make_shared(TIndexInfo::GetSpecialColumnIdsSet(), ReadMetadata->GetIndexInfo(), readSchema); - IndexChecker = ReadMetadata->GetProgram().GetIndexChecker(); - { - auto predicateColumns = ReadMetadata->GetPKRangesFilter().GetColumnIds(ReadMetadata->GetIndexInfo()); - if (predicateColumns.size()) { - PredicateColumns = std::make_shared(predicateColumns, ReadMetadata->GetIndexInfo(), readSchema); - } else { - PredicateColumns = std::make_shared(); - } - } - { - auto efColumns = ReadMetadata->GetEarlyFilterColumnIds(); - if (efColumns.size()) { - EFColumns = std::make_shared(efColumns, ReadMetadata->GetIndexInfo(), readSchema); - } else { - EFColumns = std::make_shared(); - } - } - if (ReadMetadata->HasProcessingColumnIds()) { - FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); - if (SpecColumns->Contains(*FFColumns) && !EFColumns->IsEmpty()) { - FFColumns = std::make_shared(*EFColumns + *SpecColumns); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_modified", FFColumns->DebugString()); - } else { - AFL_VERIFY(!FFColumns->Contains(*SpecColumns))("info", FFColumns->DebugString()); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_first", FFColumns->DebugString()); - } - } else { - FFColumns = EFColumns; - } - if (FFColumns->IsEmpty()) { - ProgramInputColumns = SpecColumns; - } else { - ProgramInputColumns = FFColumns; - } - - PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); - MergeColumns = std::make_shared(*PKColumns + *SpecColumns); - - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); - CacheFetchingScripts[0][0][0] = BuildColumnsFetchingPlan(false, false, false); - CacheFetchingScripts[0][1][0] = BuildColumnsFetchingPlan(false, true, false); - CacheFetchingScripts[1][0][0] = BuildColumnsFetchingPlan(true, false, false); - CacheFetchingScripts[1][1][0] = BuildColumnsFetchingPlan(true, true, false); - CacheFetchingScripts[0][0][1] = BuildColumnsFetchingPlan(false, false, true); - CacheFetchingScripts[0][1][1] = BuildColumnsFetchingPlan(false, true, true); - CacheFetchingScripts[1][0][1] = BuildColumnsFetchingPlan(true, false, true); - CacheFetchingScripts[1][1][1] = BuildColumnsFetchingPlan(true, true, true); -} - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/context.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/context.h deleted file mode 100644 index ff6f17216642..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/context.h +++ /dev/null @@ -1,51 +0,0 @@ -#pragma once -#include "columns_set.h" -#include "fetching.h" -#include -#include - -namespace NKikimr::NOlap::NPlainReader { - -class IDataSource; - -class TSpecialReadContext { -private: - YDB_READONLY_DEF(std::shared_ptr, CommonContext); - - YDB_READONLY_DEF(std::shared_ptr, SpecColumns); - YDB_READONLY_DEF(std::shared_ptr, MergeColumns); - YDB_READONLY_DEF(std::shared_ptr, EFColumns); - YDB_READONLY_DEF(std::shared_ptr, PredicateColumns); - YDB_READONLY_DEF(std::shared_ptr, PKColumns); - YDB_READONLY_DEF(std::shared_ptr, FFColumns); - YDB_READONLY_DEF(std::shared_ptr, ProgramInputColumns); - - NIndexes::TIndexCheckerContainer IndexChecker; - TReadMetadata::TConstPtr ReadMetadata; - std::shared_ptr EmptyColumns = std::make_shared(); - std::shared_ptr BuildColumnsFetchingPlan(const bool needSnapshotsFilter, const bool exclusiveSource, const bool partialUsageByPredicate) const; - std::array, 2>, 2>, 2> CacheFetchingScripts; -public: - ui64 GetMemoryForSources(const std::map>& sources, const bool isExclusive); - - const TReadMetadata::TConstPtr& GetReadMetadata() const { - return ReadMetadata; - } - - std::shared_ptr BuildMerger() const; - - TString DebugString() const { - return TStringBuilder() << - "ef=" << EFColumns->DebugString() << ";" << - "pk=" << PKColumns->DebugString() << ";" << - "ff=" << FFColumns->DebugString() << ";" << - "program_input=" << ProgramInputColumns->DebugString() - ; - } - - TSpecialReadContext(const std::shared_ptr& commonContext); - - std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source, const bool exclusiveSource) const; -}; - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/fetching.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/fetching.cpp deleted file mode 100644 index 7f38c7dfc000..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/fetching.cpp +++ /dev/null @@ -1,98 +0,0 @@ -#include "fetching.h" -#include "source.h" -#include -#include - -#include - -namespace NKikimr::NOlap::NPlainReader { - -bool TStepAction::DoApply(IDataReader& /*owner*/) const { - if (FinishedFlag) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "apply"); - Source->SetIsReady(); - } - return true; -} - -bool TStepAction::DoExecute() { - NMiniKQL::TThrowingBindTerminator bind; - while (Step) { - if (Source->IsEmptyData()) { - Source->Finalize(); - FinishedFlag = true; - return true; - } - if (!Step->ExecuteInplace(Source, Step)) { - return true; - } - if (Source->IsEmptyData()) { - Source->Finalize(); - FinishedFlag = true; - return true; - } - Step = Step->GetNextStep(); - } - Source->Finalize(); - FinishedFlag = true; - return true; -} - -bool TBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& step) const { - AFL_VERIFY((!!Columns) ^ (!!Indexes)); - - const bool startFetchingColumns = Columns ? source->StartFetchingColumns(source, step, Columns) : false; - const bool startFetchingIndexes = Indexes ? source->StartFetchingIndexes(source, step, Indexes) : false; - return !startFetchingColumns && !startFetchingIndexes; -} - -ui64 TBlobsFetchingStep::PredictRawBytes(const std::shared_ptr& source) const { - if (Columns) { - return source->GetRawBytes(Columns->GetColumnIds()); - } else { - AFL_VERIFY(Indexes); - return source->GetIndexBytes(Indexes->GetIndexIdsSet()); - } -} - -bool TAssemblerStep::DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const { - source->AssembleColumns(Columns); - return true; -} - -bool TFilterProgramStep::DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const { - AFL_VERIFY(source); - AFL_VERIFY(Step); - AFL_VERIFY(source->GetStageData().GetTable()); - auto filter = Step->BuildFilter(source->GetStageData().GetTable()); - source->MutableStageData().AddFilter(filter); - return true; -} - -bool TPredicateFilter::DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const { - auto filter = source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()); - source->MutableStageData().AddFilter(filter); - return true; -} - -bool TSnapshotFilter::DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const { - auto filter = MakeSnapshotFilter(source->GetStageData().GetTable(), source->GetContext()->GetReadMetadata()->GetSnapshot()); - source->MutableStageData().AddFilter(filter); - return true; -} - -bool TBuildFakeSpec::DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const { - std::vector> columns; - for (auto&& f : TIndexInfo::ArrowSchemaSnapshot()->fields()) { - columns.emplace_back(NArrow::TThreadSimpleArraysCache::GetConst(f->type(), std::make_shared(0), Count)); - } - source->MutableStageData().AddBatch(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns)); - return true; -} - -bool TApplyIndexStep::DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const { - source->ApplyIndex(IndexChecker); - return true; -} - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/fetching.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/fetching.h deleted file mode 100644 index 6446538535da..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/fetching.h +++ /dev/null @@ -1,220 +0,0 @@ -#pragma once -#include "columns_set.h" -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap::NPlainReader { -class IDataSource; - -class IFetchingStep { -private: - std::shared_ptr NextStep; - YDB_READONLY_DEF(TString, Name); - YDB_READONLY(ui32, Index, 0); - YDB_READONLY_DEF(TString, BranchName); -protected: - virtual bool DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& step) const = 0; - virtual TString DoDebugString() const { - return ""; - } -public: - virtual ~IFetchingStep() = default; - - std::shared_ptr AttachNext(const std::shared_ptr& nextStep) { - AFL_VERIFY(nextStep); - NextStep = nextStep; - nextStep->Index = Index + 1; - if (!nextStep->BranchName) { - nextStep->BranchName = BranchName; - } - return nextStep; - } - - virtual ui64 PredictRawBytes(const std::shared_ptr& /*source*/) const { - return 0; - } - - bool ExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& step) const { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("scan_step", DebugString())("scan_step_idx", GetIndex()); - return DoExecuteInplace(source, step); - } - - const std::shared_ptr& GetNextStep() const { - return NextStep; - } - - IFetchingStep(const TString& name, const TString& branchName = Default()) - : Name(name) - , BranchName(branchName) - { - - } - - TString DebugString() const { - TStringBuilder sb; - sb << "name=" << Name << ";" << DoDebugString() << ";branch=" << BranchName << ";"; - if (NextStep) { - sb << "next=" << NextStep->DebugString() << ";"; - } - return sb; - } -}; - -class TStepAction: public NColumnShard::IDataTasksProcessor::ITask { -private: - using TBase = NColumnShard::IDataTasksProcessor::ITask; - std::shared_ptr Source; - std::shared_ptr Step; - bool FinishedFlag = false; -protected: - virtual bool DoApply(IDataReader& /*owner*/) const override; - virtual bool DoExecute() override; -public: - virtual TString GetTaskClassIdentifier() const override { - return "STEP_ACTION"; - } - - TStepAction(const std::shared_ptr& source, const std::shared_ptr& step, const NActors::TActorId& ownerActorId) - : TBase(ownerActorId) - , Source(source) - , Step(step) - { - - } -}; - -class TBuildFakeSpec: public IFetchingStep { -private: - using TBase = IFetchingStep; - const ui32 Count = 0; -protected: - virtual bool DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const override; -public: - TBuildFakeSpec(const ui32 count, const TString& nameBranch = "") - : TBase("FAKE_SPEC", nameBranch) - , Count(count) - { - AFL_VERIFY(Count); - } -}; - -class TFakeStep: public IFetchingStep { -private: - using TBase = IFetchingStep; -public: - virtual bool DoExecuteInplace(const std::shared_ptr& /*source*/, const std::shared_ptr& /*step*/) const override { - return true; - } - - TFakeStep() - : TBase("FAKE") - { - - } -}; - -class TApplyIndexStep: public IFetchingStep { -private: - using TBase = IFetchingStep; - const NIndexes::TIndexCheckerContainer IndexChecker; -protected: - virtual bool DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const override; -public: - TApplyIndexStep(const NIndexes::TIndexCheckerContainer& indexChecker) - : TBase("APPLY_INDEX") - , IndexChecker(indexChecker) - { - - } -}; - -class TBlobsFetchingStep: public IFetchingStep { -private: - using TBase = IFetchingStep; - std::shared_ptr Columns; - std::shared_ptr Indexes; -protected: - virtual bool DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& step) const override; - virtual ui64 PredictRawBytes(const std::shared_ptr& source) const override; - virtual TString DoDebugString() const override { - TStringBuilder sb; - if (Columns) { - sb << "columns=" << Columns->DebugString() << ";"; - } else { - sb << "indexes=" << Indexes->DebugString() << ";"; - } - return sb; - } -public: - TBlobsFetchingStep(const std::shared_ptr& columns, const TString& nameBranch = "") - : TBase("FETCHING", nameBranch) - , Columns(columns) { - AFL_VERIFY(Columns); - AFL_VERIFY(Columns->GetColumnsCount()); - } - - TBlobsFetchingStep(const std::shared_ptr& indexes, const TString& nameBranch = "") - : TBase("FETCHING", nameBranch) - , Indexes(indexes) { - AFL_VERIFY(Indexes); - AFL_VERIFY(Indexes->GetIndexesCount()); - } -}; - -class TAssemblerStep: public IFetchingStep { -private: - using TBase = IFetchingStep; - YDB_READONLY_DEF(std::shared_ptr, Columns); - virtual TString DoDebugString() const override { - return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; - } -public: - virtual bool DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& /*step*/) const override; - TAssemblerStep(const std::shared_ptr& columns) - : TBase("ASSEMBLER") - , Columns(columns) - { - AFL_VERIFY(Columns); - } -}; - -class TFilterProgramStep: public IFetchingStep { -private: - using TBase = IFetchingStep; - std::shared_ptr Step; -public: - virtual bool DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& step) const override; - TFilterProgramStep(const std::shared_ptr& step) - : TBase("PROGRAM") - , Step(step) - { - - } -}; - -class TPredicateFilter: public IFetchingStep { -private: - using TBase = IFetchingStep; -public: - virtual bool DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& step) const override; - TPredicateFilter() - : TBase("PREDICATE") { - - } -}; - -class TSnapshotFilter: public IFetchingStep { -private: - using TBase = IFetchingStep; -public: - virtual bool DoExecuteInplace(const std::shared_ptr& source, const std::shared_ptr& step) const override; - TSnapshotFilter() - : TBase("SNAPSHOT") { - - } -}; - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/interval.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/interval.cpp deleted file mode 100644 index dce2a8a3e1a3..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/interval.cpp +++ /dev/null @@ -1,200 +0,0 @@ -#include "interval.h" -#include "scanner.h" -#include "plain_read_data.h" -#include - -namespace NKikimr::NOlap::NPlainReader { - -class TMergeTask: public NColumnShard::IDataTasksProcessor::ITask { -private: - using TBase = NColumnShard::IDataTasksProcessor::ITask; - std::shared_ptr ResultBatch; - std::shared_ptr LastPK; - const NColumnShard::TCounterGuard Guard; - std::shared_ptr Context; - std::map> Sources; - std::shared_ptr MergingContext; - const ui32 IntervalIdx; - std::optional ShardedBatch; - - void PrepareResultBatch() { - if (!ResultBatch || ResultBatch->num_rows() == 0) { - ResultBatch = nullptr; - LastPK = nullptr; - return; - } - { - ResultBatch = NArrow::ExtractColumns(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); - AFL_VERIFY(ResultBatch); - AFL_VERIFY((ui32)ResultBatch->num_columns() == Context->GetProgramInputColumns()->GetColumnNamesVector().size()); - NArrow::TStatusValidator::Validate(Context->GetReadMetadata()->GetProgram().ApplyProgram(ResultBatch)); - } - if (ResultBatch->num_rows()) { - const auto& shardingPolicy = Context->GetCommonContext()->GetComputeShardingPolicy(); - if (NArrow::THashConstructor::BuildHashUI64(ResultBatch, shardingPolicy.GetColumnNames(), "__compute_sharding_hash")) { - ShardedBatch = NArrow::TShardingSplitIndex::Apply(shardingPolicy.GetShardsCount(), ResultBatch, "__compute_sharding_hash"); - } else { - ShardedBatch = NArrow::TShardedRecordBatch(ResultBatch); - } - AFL_VERIFY(!!LastPK == !!ShardedBatch->GetRecordsCount())("lpk", !!LastPK)("sb", ShardedBatch->GetRecordsCount()); - } else { - ResultBatch = nullptr; - LastPK = nullptr; - } - } - - bool EmptyFiltersOnly() const { - for (auto&& [_, i] : Sources) { - if (!i->IsEmptyData()) { - return false; - } - } - return true; - } -protected: - virtual bool DoApply(NOlap::IDataReader& indexedDataRead) const override { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoApply")("interval_idx", MergingContext->GetIntervalIdx()); - auto& reader = static_cast(indexedDataRead); - reader.MutableScanner().OnIntervalResult(ShardedBatch, LastPK, IntervalIdx, reader); - return true; - } - virtual bool DoExecute() override { - if (MergingContext->IsExclusiveInterval()) { - ResultBatch = Sources.begin()->second->GetStageResult().GetBatch(); - if (ResultBatch && ResultBatch->num_rows()) { - LastPK = Sources.begin()->second->GetLastPK(); - ResultBatch = NArrow::ExtractColumnsValidate(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); - AFL_VERIFY(ResultBatch)("info", Context->GetProgramInputColumns()->GetSchema()->ToString()); - Context->GetCommonContext()->GetCounters().OnNoScanInterval(ResultBatch->num_rows()); - if (Context->GetCommonContext()->IsReverse()) { - ResultBatch = NArrow::ReverseRecords(ResultBatch); - } - PrepareResultBatch(); - } - Sources.clear(); - AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); - return true; - } - if (EmptyFiltersOnly()) { - ResultBatch = NArrow::MakeEmptyBatch(Context->GetProgramInputColumns()->GetSchema()); - return true; - } - std::shared_ptr merger = Context->BuildMerger(); - for (auto&& [_, i] : Sources) { - if (auto rb = i->GetStageResult().GetBatch()) { - merger->AddSource(rb, i->GetStageResult().GetNotAppliedFilter()); - } - } - AFL_VERIFY(merger->GetSourcesCount() <= Sources.size()); - const ui32 originalSourcesCount = Sources.size(); - Sources.clear(); - - if (merger->GetSourcesCount() == 0) { - ResultBatch = nullptr; - return true; - } - - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoExecute")("interval_idx", MergingContext->GetIntervalIdx()); - merger->SkipToLowerBound(MergingContext->GetStart(), MergingContext->GetIncludeStart()); - std::optional lastResultPosition; - if (merger->GetSourcesCount() == 1) { - ResultBatch = merger->SingleSourceDrain(MergingContext->GetFinish(), MergingContext->GetIncludeFinish(), &lastResultPosition); - if (ResultBatch) { - Context->GetCommonContext()->GetCounters().OnLogScanInterval(ResultBatch->num_rows()); - AFL_VERIFY(ResultBatch->schema()->Equals(Context->GetProgramInputColumns()->GetSchema()))("res", ResultBatch->schema()->ToString())("ctx", Context->GetProgramInputColumns()->GetSchema()->ToString()); - } - if (MergingContext->GetIncludeFinish() && originalSourcesCount == 1) { - AFL_VERIFY(merger->IsEmpty())("merging_context_finish", MergingContext->GetFinish().DebugJson().GetStringRobust())("merger", merger->DebugString()); - } - } else { - auto rbBuilder = std::make_shared(Context->GetProgramInputColumns()->GetSchema()->fields()); - merger->DrainCurrentTo(*rbBuilder, MergingContext->GetFinish(), MergingContext->GetIncludeFinish(), &lastResultPosition); - Context->GetCommonContext()->GetCounters().OnLinearScanInterval(rbBuilder->GetRecordsCount()); - ResultBatch = rbBuilder->Finalize(); - } - if (lastResultPosition) { - LastPK = lastResultPosition->ExtractSortingPosition(); - } - AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); - PrepareResultBatch(); - return true; - } -public: - virtual TString GetTaskClassIdentifier() const override { - return "CS::MERGE_RESULT"; - } - - TMergeTask(std::shared_ptr&& mergingContext, const std::shared_ptr& readContext, std::map>&& sources) - : TBase(readContext->GetCommonContext()->GetScanActorId()) - , Guard(readContext->GetCommonContext()->GetCounters().GetMergeTasksGuard()) - , Context(readContext) - , Sources(std::move(sources)) - , MergingContext(std::move(mergingContext)) - , IntervalIdx(MergingContext->GetIntervalIdx()) - { - for (auto&& s : Sources) { - AFL_VERIFY(s.second->IsDataReady()); - } - - } -}; - -void TFetchingInterval::ConstructResult() { - if (ReadySourcesCount.Val() != WaitSourcesCount || !ReadyGuards.Val()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_construct_result")("interval_idx", IntervalIdx); - return; - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "start_construct_result")("interval_idx", IntervalIdx); - } - if (AtomicCas(&ResultConstructionInProgress, 1, 0)) { - auto task = std::make_shared(std::move(MergingContext), Context, std::move(Sources)); - task->SetPriority(NConveyor::ITask::EPriority::High); - NConveyor::TScanServiceOperator::SendTaskToExecute(task); - } -} - -void TFetchingInterval::OnInitResourcesGuard(const std::shared_ptr& guard) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "allocated")("interval_idx", IntervalIdx); - AFL_VERIFY(guard); - AFL_VERIFY(!ResourcesGuard); - ResourcesGuard = guard; - AFL_VERIFY(ReadyGuards.Inc() <= 1); - ConstructResult(); -} - -void TFetchingInterval::OnSourceFetchStageReady(const ui32 /*sourceIdx*/) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "fetched")("interval_idx", IntervalIdx); - AFL_VERIFY(ReadySourcesCount.Inc() <= WaitSourcesCount); - ConstructResult(); -} - -TFetchingInterval::TFetchingInterval(const NIndexedReader::TSortableBatchPosition& start, const NIndexedReader::TSortableBatchPosition& finish, - const ui32 intervalIdx, const std::map>& sources, const std::shared_ptr& context, - const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) - : TTaskBase(0, context->GetMemoryForSources(sources, isExclusiveInterval), "", context->GetCommonContext()->GetResourcesTaskContext()) - , MergingContext(std::make_shared(start, finish, intervalIdx, includeFinish, includeStart, isExclusiveInterval)) - , Context(context) - , TaskGuard(Context->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) - , Sources(sources) - , IntervalIdx(intervalIdx) -{ - Y_ABORT_UNLESS(Sources.size()); - for (auto&& [_, i] : Sources) { - if (!i->IsDataReady()) { - ++WaitSourcesCount; - } - i->RegisterInterval(*this); - } -} - -void TFetchingInterval::DoOnAllocationSuccess(const std::shared_ptr& guard) { - AFL_VERIFY(guard); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("interval_idx", IntervalIdx)("event", "resources_allocated") - ("resources", guard->DebugString())("start", MergingContext->GetIncludeStart())("finish", MergingContext->GetIncludeFinish())("sources", Sources.size()); - for (auto&& [_, i] : Sources) { - i->InitFetchingPlan(Context->GetColumnsFetchingPlan(i, MergingContext->IsExclusiveInterval()), i, MergingContext->IsExclusiveInterval()); - } - OnInitResourcesGuard(guard); -} - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/interval.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/interval.h deleted file mode 100644 index 50f7fe76a807..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/interval.h +++ /dev/null @@ -1,116 +0,0 @@ -#pragma once -#include -#include -#include "source.h" - -namespace NKikimr::NOlap::NPlainReader { - -class TScanHead; - -class TMergingContext { -protected: - YDB_READONLY_DEF(NIndexedReader::TSortableBatchPosition, Start); - YDB_READONLY_DEF(NIndexedReader::TSortableBatchPosition, Finish); - YDB_READONLY(bool, IncludeFinish, true); - YDB_READONLY(bool, IncludeStart, false); - YDB_READONLY(ui32, IntervalIdx, 0); - bool IsExclusiveIntervalFlag = false; -public: - TMergingContext(const NIndexedReader::TSortableBatchPosition& start, const NIndexedReader::TSortableBatchPosition& finish, - const ui32 intervalIdx, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) - : Start(start) - , Finish(finish) - , IncludeFinish(includeFinish) - , IncludeStart(includeStart) - , IntervalIdx(intervalIdx) - , IsExclusiveIntervalFlag(isExclusiveInterval) - { - - } - - bool IsExclusiveInterval() const { - return IsExclusiveIntervalFlag; - } - - NJson::TJsonValue DebugJson() const { - NJson::TJsonValue result = NJson::JSON_MAP; - result.InsertValue("start", Start.DebugJson()); - result.InsertValue("idx", IntervalIdx); - result.InsertValue("finish", Finish.DebugJson()); - result.InsertValue("include_finish", IncludeFinish); - result.InsertValue("exclusive", IsExclusiveIntervalFlag); - return result; - } - -}; - -class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe::ITask { -private: - using TTaskBase = NResourceBroker::NSubscribe::ITask; - std::shared_ptr MergingContext; - TAtomic ResultConstructionInProgress = 0; - std::shared_ptr Context; - NColumnShard::TCounterGuard TaskGuard; - std::map> Sources; - void ConstructResult(); - - IDataSource& GetSourceVerified(const ui32 idx) { - auto it = Sources.find(idx); - Y_ABORT_UNLESS(it != Sources.end()); - return *it->second; - } - - std::shared_ptr ExtractSourceVerified(const ui32 idx) { - auto it = Sources.find(idx); - Y_ABORT_UNLESS(it != Sources.end()); - auto result = it->second; - Sources.erase(it); - return result; - } - - std::shared_ptr ResourcesGuard; - const ui32 IntervalIdx; - TAtomicCounter ReadySourcesCount = 0; - TAtomicCounter ReadyGuards = 0; - ui32 WaitSourcesCount = 0; - void OnInitResourcesGuard(const std::shared_ptr& guard); -protected: - virtual void DoOnAllocationSuccess(const std::shared_ptr& guard) override; - -public: - ui32 GetIntervalIdx() const { - return IntervalIdx; - } - - const std::map>& GetSources() const { - return Sources; - } - - const std::shared_ptr& GetResourcesGuard() const { - return ResourcesGuard; - } - - void Abort() { - for (auto&& i : Sources) { - i.second->Abort(); - } - } - - NJson::TJsonValue DebugJson() const { - NJson::TJsonValue result = NJson::JSON_MAP; - result.InsertValue("merging_context", MergingContext ? MergingContext->DebugJson() : ""); - auto& jsonSources = result.InsertValue("sources", NJson::JSON_ARRAY); - for (auto&& [_, i] : Sources) { - jsonSources.AppendValue(i->DebugJson()); - } - return result; - } - - void OnSourceFetchStageReady(const ui32 sourceIdx); - - TFetchingInterval(const NIndexedReader::TSortableBatchPosition& start, const NIndexedReader::TSortableBatchPosition& finish, - const ui32 intervalIdx, const std::map>& sources, const std::shared_ptr& context, - const bool includeFinish, const bool includeStart, const bool isExclusiveInterval); -}; - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp similarity index 89% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.cpp rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp index ee95b7e42fa1..f100c8f89041 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.cpp @@ -2,7 +2,7 @@ #include #include -namespace NKikimr::NOlap::NPlainReader { +namespace NKikimr::NOlap::NReader::NPlain { TString TColumnsSet::DebugString() const { return TStringBuilder() << "(" @@ -11,7 +11,7 @@ TString TColumnsSet::DebugString() const { << ");"; } -NKikimr::NOlap::NPlainReader::TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { +NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator-(const TColumnsSet& external) const { if (external.IsEmpty() || IsEmpty()) { return *this; } @@ -30,7 +30,7 @@ NKikimr::NOlap::NPlainReader::TColumnsSet TColumnsSet::operator-(const TColumnsS return result; } -NKikimr::NOlap::NPlainReader::TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { +NKikimr::NOlap::NReader::NPlain::TColumnsSet TColumnsSet::operator+(const TColumnsSet& external) const { if (external.IsEmpty()) { return *this; } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h similarity index 88% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.h rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h index 0bca398204e2..08d7ac103d80 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/columns_set.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/columns_set.h @@ -4,7 +4,7 @@ #include #include -namespace NKikimr::NOlap::NPlainReader { +namespace NKikimr::NOlap::NReader::NPlain { class TIndexesSet { private: @@ -83,6 +83,11 @@ class TColumnsSet { return *FilteredSchema; } + const std::shared_ptr& GetFilteredSchemaPtrVerified() const { + AFL_VERIFY(FilteredSchema); + return FilteredSchema; + } + bool Contains(const std::shared_ptr& columnsSet) const { if (!columnsSet) { return true; @@ -115,6 +120,16 @@ class TColumnsSet { return false; } + std::set Intersect(const TColumnsSet& columnsSet) const { + std::set result; + for (auto&& i : columnsSet.ColumnIds) { + if (ColumnIds.contains(i)) { + result.emplace(i); + } + } + return result; + } + bool IsEqual(const TColumnsSet& columnsSet) const { if (columnsSet.GetColumnIds().size() != ColumnIds.size()) { return false; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp similarity index 62% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.cpp rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp index a4ecce5de374..55810391516d 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.cpp @@ -1,18 +1,18 @@ #include "constructor.h" #include -namespace NKikimr::NOlap::NPlainReader { +namespace NKikimr::NOlap::NReader::NPlain { void TBlobsFetcherTask::DoOnDataReady(const std::shared_ptr& /*resourcesGuard*/) { - Source->MutableStageData().AddBlobs(ExtractBlobsData()); - AFL_VERIFY(Step->GetNextStep()); - auto task = std::make_shared(Source, Step->GetNextStep(), Context->GetCommonContext()->GetScanActorId()); + Source->MutableStageData().AddBlobs(Source->DecodeBlobAddresses(ExtractBlobsData())); + AFL_VERIFY(Step.Next()); + auto task = std::make_shared(Source, std::move(Step), Context->GetCommonContext()->GetScanActorId()); NConveyor::TScanServiceOperator::SendTaskToExecute(task); } -bool TBlobsFetcherTask::DoOnError(const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) { +bool TBlobsFetcherTask::DoOnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("error_on_blob_reading", range.ToString())("scan_actor_id", Context->GetCommonContext()->GetScanActorId()) - ("status", status.GetErrorMessage())("status_code", status.GetStatus()); + ("status", status.GetErrorMessage())("status_code", status.GetStatus())("storage_id", storageId); NActors::TActorContext::AsActorContext().Send(Context->GetCommonContext()->GetScanActorId(), std::make_unique(TConclusionStatus::Fail("cannot read blob range " + range.ToString()))); return false; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.h similarity index 54% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.h rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.h index 3b5ceca5200e..79e3e26c4e3c 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/constructor.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/constructor.h @@ -1,25 +1,25 @@ #pragma once -#include -#include +#include +#include #include #include #include #include "source.h" -namespace NKikimr::NOlap::NPlainReader { +namespace NKikimr::NOlap::NReader::NPlain { -class TBlobsFetcherTask: public NBlobOperations::NRead::ITask { +class TBlobsFetcherTask: public NBlobOperations::NRead::ITask, public NColumnShard::TMonitoringObjectsCounter { private: using TBase = NBlobOperations::NRead::ITask; const std::shared_ptr Source; - const std::shared_ptr Step; + TFetchingScriptCursor Step; const std::shared_ptr Context; virtual void DoOnDataReady(const std::shared_ptr& resourcesGuard) override; - virtual bool DoOnError(const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) override; + virtual bool DoOnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) override; public: TBlobsFetcherTask(const std::vector>& readActions, const std::shared_ptr& sourcePtr, - const std::shared_ptr& step, const std::shared_ptr& context, const TString& taskCustomer, const TString& externalTaskId) + const TFetchingScriptCursor& step, const std::shared_ptr& context, const TString& taskCustomer, const TString& externalTaskId) : TBase(readActions, taskCustomer, externalTaskId) , Source(sourcePtr) , Step(step) diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp new file mode 100644 index 000000000000..e6f7fe827f94 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.cpp @@ -0,0 +1,235 @@ +#include "context.h" +#include "source.h" + +namespace NKikimr::NOlap::NReader::NPlain { + +std::unique_ptr TSpecialReadContext::BuildMerger() const { + return std::make_unique(ReadMetadata->GetReplaceKey(), ProgramInputColumns->GetSchema(), CommonContext->IsReverse(), IIndexInfo::GetSpecialColumnNames()); +} + +ui64 TSpecialReadContext::GetMemoryForSources(const THashMap>& sources, const bool isExclusive) { + ui64 result = 0; + bool hasSequentialReadSources = false; + for (auto&& i : sources) { + auto fetchingPlan = GetColumnsFetchingPlan(i.second); + AFL_VERIFY(i.second->GetIntervalsCount()); + const ui64 sourceMemory = fetchingPlan->PredictRawBytes(i.second) / i.second->GetIntervalsCount(); + if (!i.second->IsSourceInMemory()) { + hasSequentialReadSources = true; + } + result += sourceMemory; + } + AFL_VERIFY(result); + if (hasSequentialReadSources) { + result += ReadSequentiallyBufferSize; + } else { + if (!isExclusive && !CommonContext->IsReverse()) { + result = 2 * result; // due to in time we will have data in original portion + data in merged(or reversed) interval + } + } + return result; +} + +std::shared_ptr TSpecialReadContext::GetColumnsFetchingPlan(const std::shared_ptr& source) const { + const bool needSnapshots = !source->GetExclusiveIntervalOnly() || ReadMetadata->GetRequestSnapshot() < source->GetRecordSnapshotMax() || !source->IsSourceInMemory(); + const bool partialUsageByPK = ReadMetadata->GetPKRangesFilter().IsPortionInPartialUsage(source->GetStartReplaceKey(), source->GetFinishReplaceKey(), ReadMetadata->GetIndexInfo()); + const bool useIndexes = (IndexChecker ? source->HasIndexes(IndexChecker->GetIndexIds()) : false); + if (auto result = CacheFetchingScripts[needSnapshots ? 1 : 0][(source->GetExclusiveIntervalOnly() && source->IsSourceInMemory()) ? 1 : 0][partialUsageByPK ? 1 : 0][useIndexes ? 1 : 0]) { + return result; + } + { + std::shared_ptr result = std::make_shared(); + result->SetBranchName("FAKE"); + result->AddStep(std::make_shared(source->GetRecordsCount())); + return result; + } +} + +std::shared_ptr TSpecialReadContext::BuildColumnsFetchingPlan(const bool needSnapshots, const bool exclusiveSource, + const bool partialUsageByPredicateExt, const bool useIndexes) const { + std::shared_ptr result = std::make_shared(); + const bool partialUsageByPredicate = partialUsageByPredicateExt && PredicateColumns->GetColumnsCount(); + if (!!IndexChecker && useIndexes) { + result->AddStep(std::make_shared(std::make_shared(IndexChecker->GetIndexIds()))); + result->AddStep(std::make_shared(IndexChecker)); + } + if (!EFColumns->GetColumnsCount() && !partialUsageByPredicate) { + result->SetBranchName("simple"); + TColumnsSet columnsFetch = *FFColumns; + if (needSnapshots) { + columnsFetch = columnsFetch + *SpecColumns; + } + if (!exclusiveSource) { + columnsFetch = columnsFetch + *PKColumns + *SpecColumns; + } else { + if (columnsFetch.GetColumnsCount() == 1 && SpecColumns->Contains(columnsFetch)) { + return nullptr; + } + } + if (columnsFetch.GetColumnsCount()) { + result->AddStep(std::make_shared(std::make_shared(columnsFetch))); + if (!exclusiveSource) { + result->AddStep(std::make_shared(std::make_shared(*PKColumns + *SpecColumns), "LAST")); + auto additional = columnsFetch - (*PKColumns + *SpecColumns); + if (!additional.IsEmpty()) { + result->AddStep(std::make_shared(std::make_shared(columnsFetch - (*PKColumns + *SpecColumns)), "LAST")); + } + } else { + result->AddStep(std::make_shared(std::make_shared(columnsFetch), "LAST")); + } + } else { + return nullptr; + } + } else if (exclusiveSource) { + result->SetBranchName("exclusive"); + TColumnsSet columnsFetch = *EFColumns; + if (needSnapshots || FFColumns->Cross(*SpecColumns)) { + columnsFetch = columnsFetch + *SpecColumns; + } + if (partialUsageByPredicate) { + columnsFetch = columnsFetch + *PredicateColumns; + } + AFL_VERIFY(columnsFetch.GetColumnsCount()); + result->AddStep(std::make_shared(std::make_shared(columnsFetch))); + + if (needSnapshots || FFColumns->Cross(*SpecColumns)) { + result->AddStep(std::make_shared(SpecColumns, "SPEC")); + result->AddStep(std::make_shared()); + columnsFetch = columnsFetch - *SpecColumns; + } + if (partialUsageByPredicate) { + result->AddStep(std::make_shared(PredicateColumns, "PREDICATE")); + result->AddStep(std::make_shared()); + columnsFetch = columnsFetch - *PredicateColumns; + } + for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { + if (!i->IsFilterOnly()) { + break; + } + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + { + auto intersectionIds = columnsFetch.Intersect(stepColumnIds); + if (intersectionIds.size()) { + TColumnsSet intersection(intersectionIds, ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + result->AddStep(std::make_shared(std::make_shared(intersection), "EF")); + columnsFetch = columnsFetch - intersection; + } + } + result->AddStep(std::make_shared(i)); + } + AFL_VERIFY(columnsFetch.IsEmpty()); + TColumnsSet columnsAdditionalFetch = *FFColumns - *EFColumns - *SpecColumns; + if (partialUsageByPredicate) { + columnsAdditionalFetch = columnsAdditionalFetch - *PredicateColumns; + } + if (columnsAdditionalFetch.GetColumnsCount()) { + result->AddStep(std::make_shared(std::make_shared(columnsAdditionalFetch))); + result->AddStep(std::make_shared(std::make_shared(columnsAdditionalFetch), "LAST")); + } + } else { + result->SetBranchName("merge"); + TColumnsSet columnsFetch = *MergeColumns + *EFColumns; + AFL_VERIFY(columnsFetch.GetColumnsCount()); + result->AddStep(std::make_shared(std::make_shared(columnsFetch))); + result->AddStep(std::make_shared(SpecColumns, "SPEC")); + if (needSnapshots) { + result->AddStep(std::make_shared()); + } + result->AddStep(std::make_shared(PKColumns, "PK")); + if (partialUsageByPredicate) { + result->AddStep(std::make_shared()); + } + TColumnsSet columnsFetchEF = columnsFetch - *SpecColumns - *PKColumns; + for (auto&& i : ReadMetadata->GetProgram().GetSteps()) { + if (!i->IsFilterOnly()) { + break; + } + TColumnsSet stepColumnIds(i->GetFilterOriginalColumnIds(), ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + { + auto intersectionIds = columnsFetchEF.Intersect(stepColumnIds); + if (intersectionIds.size()) { + TColumnsSet intersection(intersectionIds, ReadMetadata->GetIndexInfo(), ReadMetadata->GetResultSchema()); + result->AddStep(std::make_shared(std::make_shared(intersection), "EF")); + columnsFetchEF = columnsFetchEF - intersection; + } + } + result->AddStep(std::make_shared(i)); + } + AFL_VERIFY(columnsFetchEF.IsEmpty()); + const TColumnsSet columnsAdditionalFetch = *FFColumns - *EFColumns - *SpecColumns - *PKColumns - *PredicateColumns; + if (columnsAdditionalFetch.GetColumnsCount()) { + result->AddStep(std::make_shared(std::make_shared(columnsAdditionalFetch))); + result->AddStep(std::make_shared(std::make_shared(columnsAdditionalFetch), "LAST")); + } + } + return result; +} + +TSpecialReadContext::TSpecialReadContext(const std::shared_ptr& commonContext) + : CommonContext(commonContext) +{ + ReadMetadata = dynamic_pointer_cast(CommonContext->GetReadMetadata()); + Y_ABORT_UNLESS(ReadMetadata); + Y_ABORT_UNLESS(ReadMetadata->SelectInfo); + + auto readSchema = ReadMetadata->GetResultSchema(); + SpecColumns = std::make_shared(TIndexInfo::GetSpecialColumnIdsSet(), ReadMetadata->GetIndexInfo(), readSchema); + IndexChecker = ReadMetadata->GetProgram().GetIndexChecker(); + { + auto predicateColumns = ReadMetadata->GetPKRangesFilter().GetColumnIds(ReadMetadata->GetIndexInfo()); + if (predicateColumns.size()) { + PredicateColumns = std::make_shared(predicateColumns, ReadMetadata->GetIndexInfo(), readSchema); + } else { + PredicateColumns = std::make_shared(); + } + } + { + auto efColumns = ReadMetadata->GetEarlyFilterColumnIds(); + if (efColumns.size()) { + EFColumns = std::make_shared(efColumns, ReadMetadata->GetIndexInfo(), readSchema); + } else { + EFColumns = std::make_shared(); + } + } + if (ReadMetadata->HasProcessingColumnIds()) { + FFColumns = std::make_shared(ReadMetadata->GetProcessingColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); + if (SpecColumns->Contains(*FFColumns) && !EFColumns->IsEmpty()) { + FFColumns = std::make_shared(*EFColumns + *SpecColumns); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_modified", FFColumns->DebugString()); + } else { + AFL_VERIFY(!FFColumns->Contains(*SpecColumns))("info", FFColumns->DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("ff_first", FFColumns->DebugString()); + } + } else { + FFColumns = EFColumns; + } + if (FFColumns->IsEmpty()) { + ProgramInputColumns = SpecColumns; + } else { + ProgramInputColumns = FFColumns; + } + + PKColumns = std::make_shared(ReadMetadata->GetPKColumnIds(), ReadMetadata->GetIndexInfo(), readSchema); + MergeColumns = std::make_shared(*PKColumns + *SpecColumns); + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("columns_context_info", DebugString()); + CacheFetchingScripts[0][0][0][0] = BuildColumnsFetchingPlan(false, false, false, false); + CacheFetchingScripts[0][1][0][0] = BuildColumnsFetchingPlan(false, true, false, false); + CacheFetchingScripts[1][0][0][0] = BuildColumnsFetchingPlan(true, false, false, false); + CacheFetchingScripts[1][1][0][0] = BuildColumnsFetchingPlan(true, true, false, false); + CacheFetchingScripts[0][0][1][0] = BuildColumnsFetchingPlan(false, false, true, false); + CacheFetchingScripts[0][1][1][0] = BuildColumnsFetchingPlan(false, true, true, false); + CacheFetchingScripts[1][0][1][0] = BuildColumnsFetchingPlan(true, false, true, false); + CacheFetchingScripts[1][1][1][0] = BuildColumnsFetchingPlan(true, true, true, false); + + CacheFetchingScripts[0][0][0][1] = BuildColumnsFetchingPlan(false, false, false, true); + CacheFetchingScripts[0][1][0][1] = BuildColumnsFetchingPlan(false, true, false, true); + CacheFetchingScripts[1][0][0][1] = BuildColumnsFetchingPlan(true, false, false, true); + CacheFetchingScripts[1][1][0][1] = BuildColumnsFetchingPlan(true, true, false, true); + CacheFetchingScripts[0][0][1][1] = BuildColumnsFetchingPlan(false, false, true, true); + CacheFetchingScripts[0][1][1][1] = BuildColumnsFetchingPlan(false, true, true, true); + CacheFetchingScripts[1][0][1][1] = BuildColumnsFetchingPlan(true, false, true, true); + CacheFetchingScripts[1][1][1][1] = BuildColumnsFetchingPlan(true, true, true, true); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h new file mode 100644 index 000000000000..dc46c4b70b19 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/context.h @@ -0,0 +1,61 @@ +#pragma once +#include "columns_set.h" +#include "fetching.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +class IDataSource; + +class TSpecialReadContext { +private: + YDB_READONLY_DEF(std::shared_ptr, CommonContext); + + YDB_READONLY_DEF(std::shared_ptr, SpecColumns); + YDB_READONLY_DEF(std::shared_ptr, MergeColumns); + YDB_READONLY_DEF(std::shared_ptr, EFColumns); + YDB_READONLY_DEF(std::shared_ptr, PredicateColumns); + YDB_READONLY_DEF(std::shared_ptr, PKColumns); + YDB_READONLY_DEF(std::shared_ptr, FFColumns); + YDB_READONLY_DEF(std::shared_ptr, ProgramInputColumns); + + NIndexes::TIndexCheckerContainer IndexChecker; + TReadMetadata::TConstPtr ReadMetadata; + std::shared_ptr EmptyColumns = std::make_shared(); + std::shared_ptr BuildColumnsFetchingPlan(const bool needSnapshotsFilter, const bool exclusiveSource, const bool partialUsageByPredicate, const bool useIndexes) const; + std::array, 2>, 2>, 2>, 2> CacheFetchingScripts; +public: + static const inline ui64 DefaultRejectMemoryIntervalLimit = ((ui64)3) << 30; + static const inline ui64 DefaultReduceMemoryIntervalLimit = DefaultRejectMemoryIntervalLimit; + static const inline ui64 DefaultReadSequentiallyBufferSize = ((ui64)8) << 20; + + const ui64 ReduceMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetReduceMemoryIntervalLimit(DefaultReduceMemoryIntervalLimit); + const ui64 RejectMemoryIntervalLimit = NYDBTest::TControllers::GetColumnShardController()->GetRejectMemoryIntervalLimit(DefaultRejectMemoryIntervalLimit); + const ui64 ReadSequentiallyBufferSize = DefaultReadSequentiallyBufferSize; + + ui64 GetMemoryForSources(const THashMap>& sources, const bool isExclusive); + + const TReadMetadata::TConstPtr& GetReadMetadata() const { + return ReadMetadata; + } + + std::unique_ptr BuildMerger() const; + + TString DebugString() const { + return TStringBuilder() << + "ef=" << EFColumns->DebugString() << ";" << + "pk=" << PKColumns->DebugString() << ";" << + "ff=" << FFColumns->DebugString() << ";" << + "program_input=" << ProgramInputColumns->DebugString() + ; + } + + TSpecialReadContext(const std::shared_ptr& commonContext); + + std::shared_ptr GetColumnsFetchingPlan(const std::shared_ptr& source) const; +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/fetched_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp similarity index 53% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/fetched_data.cpp rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp index d4f1a808b742..3b434b67cc50 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/fetched_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.cpp @@ -1,16 +1,17 @@ #include "fetched_data.h" #include #include +#include namespace NKikimr::NOlap { void TFetchedData::SyncTableColumns(const std::vector>& fields) { for (auto&& i : fields) { - if (Table->GetColumnByName(i->name())) { + if (Table->GetSchema()->GetFieldByName(i->name())) { continue; } - Table = NArrow::TStatusValidator::GetValid(Table->AddColumn(Table->num_columns(), i, - std::make_shared(NArrow::TThreadSimpleArraysCache::GetNull(i->type(), Table->num_rows())))); + Table->AddField(i, std::make_shared(NArrow::TThreadSimpleArraysCache::GetNull(i->type(), Table->num_rows()))) + .Validate(); } } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/fetched_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h similarity index 62% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/fetched_data.h rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h index e31f51bd0dac..f1575baaf9cc 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/fetched_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetched_data.h @@ -2,7 +2,9 @@ #include #include #include +#include #include +#include #include #include #include @@ -11,9 +13,9 @@ namespace NKikimr::NOlap { class TFetchedData { protected: - using TBlobs = THashMap; + using TBlobs = THashMap; YDB_ACCESSOR_DEF(TBlobs, Blobs); - YDB_READONLY_DEF(std::shared_ptr, Table); + YDB_READONLY_DEF(std::shared_ptr, Table); YDB_READONLY_DEF(std::shared_ptr, Filter); YDB_READONLY(bool, UseFilter, false); public: @@ -33,8 +35,8 @@ class TFetchedData { return UseFilter ? nullptr : Filter; } - TString ExtractBlob(const TBlobRange& bRange) { - auto it = Blobs.find(bRange); + TString ExtractBlob(const TChunkAddress& address) { + auto it = Blobs.find(address); AFL_VERIFY(it != Blobs.end()); AFL_VERIFY(it->second.IsBlob()); auto result = it->second.GetData(); @@ -42,24 +44,20 @@ class TFetchedData { return result; } - void AddBlobs(THashMap&& blobs) { - for (auto&& i : blobs) { + void AddBlobs(THashMap&& blobData) { + for (auto&& i : blobData) { AFL_VERIFY(Blobs.emplace(i.first, std::move(i.second)).second); } } - void AddNulls(THashMap&& blobs) { + void AddNulls(THashMap&& blobs) { for (auto&& i : blobs) { AFL_VERIFY(Blobs.emplace(i.first, i.second).second); } } - bool IsEmptyFilter() const { - return Filter && Filter->IsTotalDenyFilter(); - } - bool IsEmpty() const { - return IsEmptyFilter() || (Table && !Table->num_rows()); + return (Filter && Filter->IsTotalDenyFilter()) || (Table && !Table->num_rows()); } void AddFilter(const std::shared_ptr& filter) { @@ -86,15 +84,30 @@ class TFetchedData { return AddBatch(arrow::Table::Make(batch->schema(), batch->columns(), batch->num_rows())); } + void AddBatch(const std::shared_ptr& table) { + AFL_VERIFY(table); + if (UseFilter) { + AddBatch(table->BuildTable()); + } else { + if (!Table) { + Table = table; + } else { + auto mergeResult = Table->MergeColumnsStrictly(*table); + AFL_VERIFY(mergeResult.IsSuccess())("error", mergeResult.GetErrorMessage()); + } + } + } + void AddBatch(const std::shared_ptr& table) { auto tableLocal = table; if (Filter && UseFilter) { AFL_VERIFY(Filter->Apply(tableLocal)); } if (!Table) { - Table = tableLocal; + Table = std::make_shared(tableLocal); } else { - AFL_VERIFY(NArrow::MergeBatchColumns({Table, tableLocal}, Table)); + auto mergeResult = Table->MergeColumnsStrictly(NArrow::TGeneralContainer(tableLocal)); + AFL_VERIFY(mergeResult.IsSuccess())("error", mergeResult.GetErrorMessage()); } } @@ -102,14 +115,16 @@ class TFetchedData { class TFetchedResult { private: - YDB_READONLY_DEF(std::shared_ptr, Batch); + YDB_READONLY_DEF(std::shared_ptr, Batch); YDB_READONLY_DEF(std::shared_ptr, NotAppliedFilter); public: TFetchedResult(std::unique_ptr&& data) - : NotAppliedFilter(data->GetNotAppliedFilter()) { - if (data->GetTable()) { - Batch = NArrow::ToBatch(data->GetTable(), true); - } + : Batch(data->GetTable()) + , NotAppliedFilter(data->GetNotAppliedFilter()) { + } + + bool IsEmpty() const { + return !Batch || Batch->num_rows() == 0 || (NotAppliedFilter && NotAppliedFilter->IsTotalDenyFilter()); } }; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp new file mode 100644 index 000000000000..daef4343318e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.cpp @@ -0,0 +1,141 @@ +#include "fetching.h" +#include "source.h" +#include +#include + +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +bool TStepAction::DoApply(IDataReader& /*owner*/) const { + if (FinishedFlag) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "apply"); + Source->SetIsReady(); + } + return true; +} + +bool TStepAction::DoExecute() { + if (Source->IsAborted()) { + return true; + } + auto executeResult = Cursor.Execute(Source); + if (!executeResult) { + SetErrorMessage(executeResult.GetErrorMessage()); + return false; + } + if (*executeResult) { + Source->Finalize(); + FinishedFlag = true; + } + return true; +} + +TConclusion TColumnBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + return !source->StartFetchingColumns(source, step, Columns); +} + +ui64 TColumnBlobsFetchingStep::DoPredictRawBytes(const std::shared_ptr& source) const { + const ui64 result = source->GetColumnRawBytes(Columns->GetColumnIds()); + if (!result) { + return Columns->GetColumnIds().size() * source->GetRecordsCount() * sizeof(ui32); // null for all records for all columns in future will be + } else { + return result; + } +} + +TConclusion TIndexBlobsFetchingStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + return !source->StartFetchingIndexes(source, step, Indexes); +} + +ui64 TIndexBlobsFetchingStep::DoPredictRawBytes(const std::shared_ptr& source) const { + return source->GetIndexRawBytes(Indexes->GetIndexIdsSet()); +} + +TConclusion TAssemblerStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->AssembleColumns(Columns); + return true; +} + +TConclusion TOptionalAssemblerStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->AssembleColumns(Columns); + return true; +} + +bool TOptionalAssemblerStep::DoInitSourceSeqColumnIds(const std::shared_ptr& source) const { + for (auto&& i : Columns->GetColumnIds()) { + if (source->AddSequentialEntityIds(i)) { + return true; + } + } + return false; +} + +TConclusion TFilterProgramStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + AFL_VERIFY(source); + AFL_VERIFY(Step); + std::shared_ptr table; + if (source->IsSourceInMemory(Step->GetFilterOriginalColumnIds())) { + auto filter = Step->BuildFilter(source->GetStageData().GetTable()); + if (!filter.ok()) { + return TConclusionStatus::Fail(filter.status().message()); + } + source->MutableStageData().AddFilter(*filter); + } + return true; +} + +ui64 TFilterProgramStep::DoPredictRawBytes(const std::shared_ptr& source) const { + return NArrow::TColumnFilter::GetPredictedMemorySize(source->GetRecordsCount()); +} + +TConclusion TPredicateFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + auto filter = source->GetContext()->GetReadMetadata()->GetPKRangesFilter().BuildFilter(source->GetStageData().GetTable()->BuildTable()); + source->MutableStageData().AddFilter(filter); + return true; +} + +TConclusion TSnapshotFilter::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + auto filter = MakeSnapshotFilter(source->GetStageData().GetTable()->BuildTable(), source->GetContext()->GetReadMetadata()->GetRequestSnapshot()); + source->MutableStageData().AddFilter(filter); + return true; +} + +TConclusion TBuildFakeSpec::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + std::vector> columns; + for (auto&& f : TIndexInfo::ArrowSchemaSnapshot()->fields()) { + columns.emplace_back(NArrow::TThreadSimpleArraysCache::GetConst(f->type(), std::make_shared(0), Count)); + } + source->MutableStageData().AddBatch(arrow::RecordBatch::Make(TIndexInfo::ArrowSchemaSnapshot(), Count, columns)); + return true; +} + +TConclusion TApplyIndexStep::DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& /*step*/) const { + source->ApplyIndex(IndexChecker); + return true; +} + +TConclusion TFetchingScriptCursor::Execute(const std::shared_ptr& source) { + AFL_VERIFY(source); + NMiniKQL::TThrowingBindTerminator bind; + AFL_VERIFY(!Script->IsFinished(CurrentStepIdx)); + while (!Script->IsFinished(CurrentStepIdx)) { + if (source->GetStageData().IsEmpty()) { + break; + } + auto step = Script->GetStep(CurrentStepIdx); + TMemoryProfileGuard mGuard("SCAN_PROFILE::FETCHING::" + step->GetName() + "::" + Script->GetBranchName(), IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("scan_step", step->DebugString())("scan_step_idx", CurrentStepIdx); + const TConclusion resultStep = step->ExecuteInplace(source, *this); + if (!resultStep) { + return resultStep; + } + if (!*resultStep) { + return false; + } + ++CurrentStepIdx; + } + return true; +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h new file mode 100644 index 000000000000..1d0a1ceee8f0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/fetching.h @@ -0,0 +1,294 @@ +#pragma once +#include "columns_set.h" +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NPlain { +class IDataSource; +class TFetchingScriptCursor; +class IFetchingStep { +private: + YDB_READONLY_DEF(TString, Name); +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const = 0; + virtual TString DoDebugString() const { + return ""; + } +public: + virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const { + return 0; + } + virtual bool DoInitSourceSeqColumnIds(const std::shared_ptr& /*source*/) const { + return false; + } + + virtual ~IFetchingStep() = default; + + [[nodiscard]] TConclusion ExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const { + return DoExecuteInplace(source, step); + } + + IFetchingStep(const TString& name) + : Name(name) + { + + } + + TString DebugString() const { + TStringBuilder sb; + sb << "name=" << Name << ";details={" << DoDebugString() << "};"; + return sb; + } +}; + +class TFetchingScript { +private: + YDB_ACCESSOR(TString, BranchName, "UNDEFINED"); + std::vector> Steps; +public: + TFetchingScript() = default; + + TString DebugString() const { + TStringBuilder sb; + sb << "["; + for (auto&& i : Steps) { + sb << "{" << i->DebugString() << "};"; + } + sb << "]"; + return sb; + } + + const std::shared_ptr& GetStep(const ui32 index) const { + AFL_VERIFY(index < Steps.size()); + return Steps[index]; + } + + ui64 PredictRawBytes(const std::shared_ptr& source) const { + ui64 result = 0; + for (auto&& current: Steps) { + result += current->DoPredictRawBytes(source); + } + return result; + } + + void AddStep(const std::shared_ptr& step) { + AFL_VERIFY(step); + Steps.emplace_back(step); + } + + bool InitSourceSeqColumnIds(const std::shared_ptr& source) const { + for (auto it = Steps.rbegin(); it != Steps.rend(); ++it) { + if ((*it)->DoInitSourceSeqColumnIds(source)) { + return true; + } + } + return false; + } + + bool IsFinished(const ui32 currentStepIdx) const { + AFL_VERIFY(currentStepIdx <= Steps.size()); + return currentStepIdx == Steps.size(); + } + + ui32 Execute(const ui32 startStepIdx, const std::shared_ptr& source) const; +}; + +class TFetchingScriptCursor { +private: + ui32 CurrentStepIdx = 0; + std::shared_ptr Script; +public: + TFetchingScriptCursor(const std::shared_ptr& script, const ui32 index) + : CurrentStepIdx(index) + , Script(script) + { + + } + + const TString& GetName() const { + return Script->GetStep(CurrentStepIdx)->GetName(); + } + + TString DebugString() const { + return Script->GetStep(CurrentStepIdx)->DebugString(); + } + + bool Next() { + return !Script->IsFinished(++CurrentStepIdx); + } + + TConclusion Execute(const std::shared_ptr& source); +}; + +class TStepAction: public IDataTasksProcessor::ITask { +private: + using TBase = IDataTasksProcessor::ITask; + std::shared_ptr Source; + TFetchingScriptCursor Cursor; + bool FinishedFlag = false; +protected: + virtual bool DoApply(IDataReader& owner) const override; + virtual bool DoExecute() override; +public: + virtual TString GetTaskClassIdentifier() const override { + return "STEP_ACTION"; + } + + TStepAction(const std::shared_ptr& source, TFetchingScriptCursor&& cursor, const NActors::TActorId& ownerActorId) + : TBase(ownerActorId) + , Source(source) + , Cursor(std::move(cursor)) + { + + } +}; + +class TBuildFakeSpec: public IFetchingStep { +private: + using TBase = IFetchingStep; + const ui32 Count = 0; +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual ui64 DoPredictRawBytes(const std::shared_ptr& /*source*/) const override { + return TIndexInfo::GetSpecialColumnsRecordSize() * Count; + } +public: + TBuildFakeSpec(const ui32 count) + : TBase("FAKE_SPEC") + , Count(count) + { + AFL_VERIFY(Count); + } +}; + +class TApplyIndexStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + const NIndexes::TIndexCheckerContainer IndexChecker; +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; +public: + TApplyIndexStep(const NIndexes::TIndexCheckerContainer& indexChecker) + : TBase("APPLY_INDEX") + , IndexChecker(indexChecker) + { + + } +}; + +class TColumnBlobsFetchingStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + std::shared_ptr Columns; +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; + } +public: + TColumnBlobsFetchingStep(const std::shared_ptr& columns) + : TBase("FETCHING_COLUMNS") + , Columns(columns) { + AFL_VERIFY(Columns); + AFL_VERIFY(Columns->GetColumnsCount()); + } +}; + +class TIndexBlobsFetchingStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + std::shared_ptr Indexes; +protected: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; + virtual TString DoDebugString() const override { + return TStringBuilder() << "indexes=" << Indexes->DebugString() << ";"; + } +public: + TIndexBlobsFetchingStep(const std::shared_ptr& indexes) + : TBase("FETCHING_INDEXES") + , Indexes(indexes) { + AFL_VERIFY(Indexes); + AFL_VERIFY(Indexes->GetIndexesCount()); + } +}; + +class TAssemblerStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + YDB_READONLY_DEF(std::shared_ptr, Columns); + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; + } +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) + : TBase("ASSEMBLER" + (specName ? "::" + specName : "")) + , Columns(columns) + { + AFL_VERIFY(Columns); + AFL_VERIFY(Columns->GetColumnsCount()); + } +}; + +class TOptionalAssemblerStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + YDB_READONLY_DEF(std::shared_ptr, Columns); + virtual TString DoDebugString() const override { + return TStringBuilder() << "columns=" << Columns->DebugString() << ";"; + } +protected: + virtual bool DoInitSourceSeqColumnIds(const std::shared_ptr& source) const override; +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TOptionalAssemblerStep(const std::shared_ptr& columns, const TString& specName = Default()) + : TBase("OPTIONAL_ASSEMBLER" + (specName ? "::" + specName : "")) + , Columns(columns) { + AFL_VERIFY(Columns); + AFL_VERIFY(Columns->GetColumnsCount()); + } +}; + +class TFilterProgramStep: public IFetchingStep { +private: + using TBase = IFetchingStep; + std::shared_ptr Step; +protected: + virtual ui64 DoPredictRawBytes(const std::shared_ptr& source) const override; +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TFilterProgramStep(const std::shared_ptr& step) + : TBase("PROGRAM") + , Step(step) + { + } +}; + +class TPredicateFilter: public IFetchingStep { +private: + using TBase = IFetchingStep; +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TPredicateFilter() + : TBase("PREDICATE") { + + } +}; + +class TSnapshotFilter: public IFetchingStep { +private: + using TBase = IFetchingStep; +public: + virtual TConclusion DoExecuteInplace(const std::shared_ptr& source, const TFetchingScriptCursor& step) const override; + TSnapshotFilter() + : TBase("SNAPSHOT") { + + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp new file mode 100644 index 000000000000..8e228937b653 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.cpp @@ -0,0 +1,94 @@ +#include "interval.h" +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +void TFetchingInterval::ConstructResult() { + if (ReadySourcesCount.Val() != WaitSourcesCount || !ReadyGuards.Val()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "skip_construct_result")("interval_idx", IntervalIdx); + return; + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "start_construct_result")("interval_idx", IntervalIdx); + } + if (AtomicCas(&SourcesFinalized, 1, 0)) { + IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitMergerStart); + auto task = std::make_shared(MergingContext, Context, std::move(Sources)); + task->SetPriority(NConveyor::ITask::EPriority::High); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + } +} + +void TFetchingInterval::OnInitResourcesGuard(const std::shared_ptr& guard) { + IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitSources); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "allocated")("interval_idx", IntervalIdx); + AFL_VERIFY(guard); + AFL_VERIFY(!ResourcesGuard); + ResourcesGuard = guard; + for (auto&& i : Sources) { + i.second->OnInitResourcesGuard(i.second); + } + AFL_VERIFY(ReadyGuards.Inc() <= 1); + ConstructResult(); +} + +void TFetchingInterval::OnSourceFetchStageReady(const ui32 /*sourceIdx*/) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "fetched")("interval_idx", IntervalIdx); + AFL_VERIFY(ReadySourcesCount.Inc() <= WaitSourcesCount); + ConstructResult(); +} + +TFetchingInterval::TFetchingInterval(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, + const ui32 intervalIdx, const THashMap>& sources, const std::shared_ptr& context, + const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) + : TTaskBase(0, context->GetMemoryForSources(sources, isExclusiveInterval), "", context->GetCommonContext()->GetResourcesTaskContext()) + , MergingContext(std::make_shared(start, finish, intervalIdx, includeFinish, includeStart, isExclusiveInterval)) + , Context(context) + , TaskGuard(Context->GetCommonContext()->GetCounters().GetResourcesAllocationTasksGuard()) + , Sources(sources) + , IntervalIdx(intervalIdx) + , IntervalStateGuard(Context->GetCommonContext()->GetCounters().CreateIntervalStateGuard()) +{ + Y_ABORT_UNLESS(Sources.size()); + for (auto&& [_, i] : Sources) { + if (!i->IsDataReady()) { + ++WaitSourcesCount; + } + i->RegisterInterval(*this); + } + IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitResources); +} + +void TFetchingInterval::DoOnAllocationSuccess(const std::shared_ptr& guard) { + AFL_VERIFY(guard); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("interval_idx", IntervalIdx)("event", "resources_allocated") + ("resources", guard->DebugString())("start", MergingContext->GetIncludeStart())("finish", MergingContext->GetIncludeFinish())("sources", Sources.size()); + OnInitResourcesGuard(guard); +} + +void TFetchingInterval::SetMerger(std::unique_ptr&& merger) { + AFL_VERIFY(!Merger); + AFL_VERIFY(AtomicCas(&PartSendingWait, 1, 0)); + if (merger) { + IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitPartialReply); + } + Merger = std::move(merger); +} + +bool TFetchingInterval::HasMerger() const { + return !!Merger; +} + +void TFetchingInterval::OnPartSendingComplete() { + AFL_VERIFY(Merger); + AFL_VERIFY(AtomicCas(&PartSendingWait, 0, 1)); + AFL_VERIFY(AtomicGet(SourcesFinalized) == 1); + if (AbortedFlag) { + return; + } + IntervalStateGuard.SetStatus(NColumnShard::TScanCounters::EIntervalStatus::WaitMergerContinue); + auto task = std::make_shared(MergingContext, Context, std::move(Merger)); + task->SetPriority(NConveyor::ITask::EPriority::High); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h new file mode 100644 index 000000000000..80613ef5b2d2 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/interval.h @@ -0,0 +1,92 @@ +#pragma once +#include "source.h" +#include "merge.h" + +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +class TFetchingInterval: public TNonCopyable, public NResourceBroker::NSubscribe::ITask { +private: + using TTaskBase = NResourceBroker::NSubscribe::ITask; + std::shared_ptr MergingContext; + bool AbortedFlag = false; + TAtomic SourcesFinalized = 0; + TAtomic PartSendingWait = 0; + std::unique_ptr Merger; + std::shared_ptr Context; + NColumnShard::TCounterGuard TaskGuard; + THashMap> Sources; + + void ConstructResult(); + + std::shared_ptr ResourcesGuard; + const ui32 IntervalIdx; + TAtomicCounter ReadySourcesCount = 0; + TAtomicCounter ReadyGuards = 0; + ui32 WaitSourcesCount = 0; + NColumnShard::TConcreteScanCounters::TScanIntervalStateGuard IntervalStateGuard; + void OnInitResourcesGuard(const std::shared_ptr& guard); +protected: + virtual void DoOnAllocationSuccess(const std::shared_ptr& guard) override; + +public: + std::set GetPathIds() const { + std::set result; + for (auto&& i : Sources) { + result.emplace(i.second->GetPathId()); + } + return result; + } + + ui32 GetIntervalIdx() const { + return IntervalIdx; + } + + const THashMap>& GetSources() const { + return Sources; + } + + const std::shared_ptr& GetResourcesGuard() const { + return ResourcesGuard; + } + + void Abort() { + AbortedFlag = true; + if (AtomicCas(&SourcesFinalized, 1, 0)) { + for (auto&& i : Sources) { + i.second->Abort(); + } + } + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("merging_context", MergingContext ? MergingContext->DebugJson() : ""); + auto& jsonSources = result.InsertValue("sources", NJson::JSON_ARRAY); + for (auto&& [_, i] : Sources) { + jsonSources.AppendValue(i->DebugJson()); + } + return result; + } + + NJson::TJsonValue DebugJsonForMemory() const { + NJson::TJsonValue result = NJson::JSON_MAP; + auto& jsonSources = result.InsertValue("sources", NJson::JSON_ARRAY); + for (auto&& [_, i] : Sources) { + jsonSources.AppendValue(i->DebugJsonForMemory()); + } + return result; + } + + void OnSourceFetchStageReady(const ui32 sourceIdx); + void OnPartSendingComplete(); + void SetMerger(std::unique_ptr&& merger); + bool HasMerger() const; + + TFetchingInterval(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, + const ui32 intervalIdx, const THashMap>& sources, const std::shared_ptr& context, + const bool includeFinish, const bool includeStart, const bool isExclusiveInterval); +}; + +} diff --git a/ydb/core/tx/columnshard/columnshard__index_scan.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp similarity index 64% rename from ydb/core/tx/columnshard/columnshard__index_scan.cpp rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp index a2d4dc7bb809..60e41095303c 100644 --- a/ydb/core/tx/columnshard/columnshard__index_scan.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.cpp @@ -1,23 +1,17 @@ -#include "columnshard__index_scan.h" -#include -#include +#include "iterator.h" -namespace NKikimr::NColumnShard { +namespace NKikimr::NOlap::NReader::NPlain { -TColumnShardScanIterator::TColumnShardScanIterator(const std::shared_ptr& context, const NOlap::TReadMetadata::TConstPtr& readMetadata) +TColumnShardScanIterator::TColumnShardScanIterator(const std::shared_ptr& context, const TReadMetadata::TConstPtr& readMetadata) : Context(context) , ReadMetadata(readMetadata) , ReadyResults(context->GetCounters()) { IndexedData = readMetadata->BuildReader(Context); Y_ABORT_UNLESS(Context->GetReadMetadata()->IsSorted()); - - if (readMetadata->Empty()) { - IndexedData->Abort(); - } } -std::optional TColumnShardScanIterator::GetBatch() { +TConclusion> TColumnShardScanIterator::GetBatch() { FillReadyResults(); return ReadyResults.pop_front(); } @@ -26,10 +20,14 @@ void TColumnShardScanIterator::PrepareResults() { FillReadyResults(); } -bool TColumnShardScanIterator::ReadNextInterval() { +TConclusion TColumnShardScanIterator::ReadNextInterval() { return IndexedData->ReadNextInterval(); } +void TColumnShardScanIterator::DoOnSentDataFromInterval(const ui32 intervalIdx) const { + return IndexedData->OnSentDataFromInterval(intervalIdx); +} + void TColumnShardScanIterator::FillReadyResults() { auto ready = IndexedData->ExtractReadyResults(MaxRowsInBatch); i64 limitLeft = Context->GetReadMetadata()->Limit == 0 ? INT64_MAX : Context->GetReadMetadata()->Limit - ItemsRead; @@ -43,13 +41,15 @@ void TColumnShardScanIterator::FillReadyResults() { } if (limitLeft == 0) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "abort_scan")("limit", Context->GetReadMetadata()->Limit)("ready", ItemsRead); - IndexedData->Abort(); + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "limit_reached_on_scan")("limit", Context->GetReadMetadata()->Limit)("ready", ItemsRead); + IndexedData->Abort("records count limit exhausted"); } } TColumnShardScanIterator::~TColumnShardScanIterator() { - IndexedData->Abort(); + if (!IndexedData->IsFinished()) { + IndexedData->Abort("iterator destructor"); + } ReadMetadata->ReadStats->PrintToLog(); } diff --git a/ydb/core/tx/columnshard/columnshard__index_scan.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h similarity index 54% rename from ydb/core/tx/columnshard/columnshard__index_scan.h rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h index 2d3d7e96e035..38f05ff276cd 100644 --- a/ydb/core/tx/columnshard/columnshard__index_scan.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/iterator.h @@ -1,44 +1,15 @@ #pragma once +#include +#include +#include +#include -#include "columnshard__scan.h" -#include "columnshard_common.h" -#include "engines/reader/read_metadata.h" -#include "engines/reader/read_context.h" - -namespace NKikimr::NColumnShard { - -class TIndexColumnResolver : public IColumnResolver { - const NOlap::TIndexInfo& IndexInfo; - -public: - explicit TIndexColumnResolver(const NOlap::TIndexInfo& indexInfo) - : IndexInfo(indexInfo) - {} - - virtual std::optional GetColumnIdOptional(const TString& name) const override { - return IndexInfo.GetColumnIdOptional(name); - } - - TString GetColumnName(ui32 id, bool required) const override { - return IndexInfo.GetColumnName(id, required); - } - - const NTable::TScheme::TTableSchema& GetSchema() const override { - return IndexInfo; - } - - NSsa::TColumnInfo GetDefaultColumn() const override { - return NSsa::TColumnInfo::Original((ui32)NOlap::TIndexInfo::ESpecialColumn::PLAN_STEP, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP); - } -}; - -using NOlap::TUnifiedBlobId; -using NOlap::TBlobRange; +namespace NKikimr::NOlap::NReader::NPlain { class TReadyResults { private: const NColumnShard::TConcreteScanCounters Counters; - std::deque Data; + std::deque Data; i64 RecordsCount = 0; public: TString DebugString() const { @@ -57,12 +28,12 @@ class TReadyResults { { } - NOlap::TPartialReadResult& emplace_back(NOlap::TPartialReadResult&& v) { + TPartialReadResult& emplace_back(TPartialReadResult&& v) { RecordsCount += v.GetResultBatch().num_rows(); Data.emplace_back(std::move(v)); return Data.back(); } - std::optional pop_front() { + std::optional pop_front() { if (Data.empty()) { return {}; } @@ -81,21 +52,28 @@ class TReadyResults { class TColumnShardScanIterator: public TScanIteratorBase { private: - std::shared_ptr Context; - const NOlap::TReadMetadata::TConstPtr ReadMetadata; + std::shared_ptr Context; + const TReadMetadata::TConstPtr ReadMetadata; TReadyResults ReadyResults; - std::shared_ptr IndexedData; + std::shared_ptr IndexedData; ui64 ItemsRead = 0; const i64 MaxRowsInBatch = 5000; + virtual void DoOnSentDataFromInterval(const ui32 intervalIdx) const override; + public: - TColumnShardScanIterator(const std::shared_ptr& context, const NOlap::TReadMetadata::TConstPtr& readMetadata); + TColumnShardScanIterator(const std::shared_ptr& context, const TReadMetadata::TConstPtr& readMetadata); ~TColumnShardScanIterator(); + virtual TConclusionStatus Start() override { + AFL_VERIFY(IndexedData); + return IndexedData->Start(); + } + virtual std::optional GetAvailableResultsCount() const override { return ReadyResults.size(); } - virtual const NOlap::TReadStats& GetStats() const override { + virtual const TReadStats& GetStats() const override { return *ReadMetadata->ReadStats; } @@ -112,10 +90,10 @@ class TColumnShardScanIterator: public TScanIteratorBase { return IndexedData->IsFinished() && ReadyResults.empty(); } - std::optional GetBatch() override; + TConclusion> GetBatch() override; virtual void PrepareResults() override; - virtual bool ReadNextInterval() override; + virtual TConclusion ReadNextInterval() override; private: void FillReadyResults(); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp new file mode 100644 index 000000000000..ec6c4c04163b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.cpp @@ -0,0 +1,157 @@ +#include "merge.h" +#include "plain_read_data.h" +#include "source.h" + +namespace NKikimr::NOlap::NReader::NPlain { + +std::optional TBaseMergeTask::DrainMergerLinearScan(const std::optional resultBufferLimit) { + std::optional lastResultPosition; + AFL_VERIFY(!ResultBatch); + auto rbBuilder = std::make_shared(Context->GetProgramInputColumns()->GetSchema()->fields()); + rbBuilder->SetMemoryBufferLimit(resultBufferLimit); + if (!Merger->DrainToControlPoint(*rbBuilder, MergingContext->GetIncludeFinish(), &lastResultPosition)) { + if (Merger->IsEmpty()) { + Merger = nullptr; + } else { + AFL_VERIFY(rbBuilder->IsBufferExhausted()); + } + } else { + Merger = nullptr; + } + Context->GetCommonContext()->GetCounters().OnLinearScanInterval(rbBuilder->GetRecordsCount()); + ResultBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({rbBuilder->Finalize()})); + return lastResultPosition; +} + +void TBaseMergeTask::PrepareResultBatch() { + if (!ResultBatch || ResultBatch->num_rows() == 0) { + ResultBatch = nullptr; + LastPK = nullptr; + return; + } + { + ResultBatch = NArrow::ExtractColumns(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); + AFL_VERIFY(ResultBatch); + AFL_VERIFY((ui32)ResultBatch->num_columns() == Context->GetProgramInputColumns()->GetColumnNamesVector().size()); + NArrow::TStatusValidator::Validate(Context->GetReadMetadata()->GetProgram().ApplyProgram(ResultBatch)); + } + if (ResultBatch->num_rows()) { + const auto& shardingPolicy = Context->GetCommonContext()->GetComputeShardingPolicy(); + if (NArrow::THashConstructor::BuildHashUI64(ResultBatch, shardingPolicy.GetColumnNames(), "__compute_sharding_hash")) { + ShardedBatch = NArrow::TShardingSplitIndex::Apply(shardingPolicy.GetShardsCount(), ResultBatch, "__compute_sharding_hash"); + } else { + ShardedBatch = NArrow::TShardedRecordBatch(ResultBatch); + } + AFL_VERIFY(!!LastPK == !!ShardedBatch->GetRecordsCount())("lpk", !!LastPK)("sb", ShardedBatch->GetRecordsCount()); + } else { + ResultBatch = nullptr; + LastPK = nullptr; + } +} + +bool TBaseMergeTask::DoApply(IDataReader& indexedDataRead) const { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoApply")("interval_idx", MergingContext->GetIntervalIdx()); + auto& reader = static_cast(indexedDataRead); + reader.MutableScanner().OnIntervalResult(ShardedBatch, LastPK, std::move(Merger), IntervalIdx, reader); + return true; +} + +bool TStartMergeTask::DoExecute() { + if (OnlyEmptySources) { + ResultBatch = nullptr; + return true; + } + bool sourcesInMemory = true; + for (auto&& i : Sources) { + if (!i.second->IsSourceInMemory()) { + sourcesInMemory = false; + break; + } + } + if (MergingContext->IsExclusiveInterval() && sourcesInMemory) { + TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::EXCLUSIVE", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + auto& container = Sources.begin()->second->GetStageResult().GetBatch(); + if (container && container->num_rows()) { + ResultBatch = container->BuildTable(); + LastPK = Sources.begin()->second->GetLastPK(); + ResultBatch = NArrow::ExtractColumnsValidate(ResultBatch, Context->GetProgramInputColumns()->GetColumnNamesVector()); + AFL_VERIFY(ResultBatch)("info", Context->GetProgramInputColumns()->GetSchema()->ToString()); + Context->GetCommonContext()->GetCounters().OnNoScanInterval(ResultBatch->num_rows()); + if (Context->GetCommonContext()->IsReverse()) { + ResultBatch = NArrow::ReverseRecords(ResultBatch); + } + PrepareResultBatch(); + } + Sources.clear(); + AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); + return true; + } + TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::COMMON", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + AFL_VERIFY(!Merger); + Merger = Context->BuildMerger(); + for (auto&& [_, i] : Sources) { + if (auto rb = i->GetStageResult().GetBatch()) { + Merger->AddSource(rb, i->GetStageResult().GetNotAppliedFilter()); + } + } + AFL_VERIFY(Merger->GetSourcesCount() <= Sources.size()); + if (Merger->GetSourcesCount() == 0) { + ResultBatch = nullptr; + return true; + } + Merger->PutControlPoint(std::make_shared(MergingContext->GetFinish())); + Merger->SkipToLowerBound(MergingContext->GetStart(), MergingContext->GetIncludeStart()); + const ui32 originalSourcesCount = Sources.size(); + Sources.clear(); + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "DoExecute")("interval_idx", MergingContext->GetIntervalIdx()); + std::optional lastResultPosition; + if (Merger->GetSourcesCount() == 1 && sourcesInMemory) { + TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::ONE", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + ResultBatch = Merger->SingleSourceDrain(MergingContext->GetFinish(), MergingContext->GetIncludeFinish(), &lastResultPosition); + if (ResultBatch) { + Context->GetCommonContext()->GetCounters().OnLogScanInterval(ResultBatch->num_rows()); + AFL_VERIFY(ResultBatch->schema()->Equals(Context->GetProgramInputColumns()->GetSchema()))("res", ResultBatch->schema()->ToString())("ctx", Context->GetProgramInputColumns()->GetSchema()->ToString()); + } + if (MergingContext->GetIncludeFinish() && originalSourcesCount == 1) { + AFL_VERIFY(Merger->IsEmpty())("merging_context_finish", MergingContext->GetFinish().DebugJson().GetStringRobust())("merger", Merger->DebugString()); + } + } else { + TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::MANY", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + const std::optional bufferLimit = sourcesInMemory ? std::nullopt : std::optional(Context->ReadSequentiallyBufferSize); + lastResultPosition = DrainMergerLinearScan(bufferLimit); + } + if (lastResultPosition) { + LastPK = lastResultPosition->ExtractSortingPosition(); + } + AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); + PrepareResultBatch(); + return true; +} + +TStartMergeTask::TStartMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, THashMap>&& sources) + : TBase(mergingContext, readContext) + , Sources(std::move(sources)) +{ + for (auto&& s : Sources) { + AFL_VERIFY(s.second->IsDataReady()); + } + for (auto&& [_, i] : Sources) { + if (!i->GetStageResult().IsEmpty()) { + OnlyEmptySources = false; + } + } +} + +bool TContinueMergeTask::DoExecute() { + TMemoryProfileGuard mGuard("SCAN_PROFILE::MERGE::CONTINUE", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); + std::optional lastResultPosition = DrainMergerLinearScan(Context->ReadSequentiallyBufferSize); + if (lastResultPosition) { + LastPK = lastResultPosition->ExtractSortingPosition(); + } + AFL_VERIFY(!!LastPK == (!!ResultBatch && ResultBatch->num_rows())); + PrepareResultBatch(); + return true; +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h new file mode 100644 index 000000000000..043ff943e472 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/merge.h @@ -0,0 +1,110 @@ +#pragma once +#include "context.h" +#include +#include + +#include +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +class TMergingContext { +protected: + YDB_READONLY_DEF(NArrow::NMerger::TSortableBatchPosition, Start); + YDB_READONLY_DEF(NArrow::NMerger::TSortableBatchPosition, Finish); + YDB_READONLY(bool, IncludeFinish, true); + YDB_READONLY(bool, IncludeStart, false); + YDB_READONLY(ui32, IntervalIdx, 0); + bool IsExclusiveIntervalFlag = false; +public: + TMergingContext(const NArrow::NMerger::TSortableBatchPosition& start, const NArrow::NMerger::TSortableBatchPosition& finish, + const ui32 intervalIdx, const bool includeFinish, const bool includeStart, const bool isExclusiveInterval) + : Start(start) + , Finish(finish) + , IncludeFinish(includeFinish) + , IncludeStart(includeStart) + , IntervalIdx(intervalIdx) + , IsExclusiveIntervalFlag(isExclusiveInterval) + { + + } + + bool IsExclusiveInterval() const { + return IsExclusiveIntervalFlag; + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("start", Start.DebugJson()); + result.InsertValue("idx", IntervalIdx); + result.InsertValue("finish", Finish.DebugJson()); + result.InsertValue("include_finish", IncludeFinish); + result.InsertValue("exclusive", IsExclusiveIntervalFlag); + return result; + } + +}; + +class TBaseMergeTask: public IDataTasksProcessor::ITask { +private: + using TBase = IDataTasksProcessor::ITask; +protected: + std::shared_ptr ResultBatch; + std::shared_ptr LastPK; + const NColumnShard::TCounterGuard Guard; + std::shared_ptr Context; + mutable std::unique_ptr Merger; + std::shared_ptr MergingContext; + const ui32 IntervalIdx; + std::optional ShardedBatch; + + [[nodiscard]] std::optional DrainMergerLinearScan(const std::optional resultBufferLimit); + + void PrepareResultBatch(); +private: + virtual bool DoApply(IDataReader& indexedDataRead) const override; +public: + TBaseMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext) + : TBase(readContext->GetCommonContext()->GetScanActorId()) + , Guard(readContext->GetCommonContext()->GetCounters().GetMergeTasksGuard()) + , Context(readContext) + , MergingContext(mergingContext) + , IntervalIdx(MergingContext->GetIntervalIdx()) { + + } +}; + +class TStartMergeTask: public TBaseMergeTask { +private: + using TBase = TBaseMergeTask; + bool OnlyEmptySources = true; + THashMap> Sources; +protected: + virtual bool DoExecute() override; +public: + virtual TString GetTaskClassIdentifier() const override { + return "CS::MERGE_START"; + } + + TStartMergeTask(const std::shared_ptr& mergingContext, + const std::shared_ptr& readContext, THashMap>&& sources); +}; + +class TContinueMergeTask: public TBaseMergeTask { +private: + using TBase = TBaseMergeTask; +protected: + virtual bool DoExecute() override; +public: + virtual TString GetTaskClassIdentifier() const override { + return "CS::MERGE_CONTINUE"; + } + + TContinueMergeTask(const std::shared_ptr& mergingContext, const std::shared_ptr& readContext, std::unique_ptr&& merger) + : TBase(mergingContext, readContext) { + AFL_VERIFY(merger); + Merger = std::move(merger); + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp similarity index 86% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp index 26b397fe2e36..9def8738cab9 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.cpp @@ -1,8 +1,8 @@ #include "plain_read_data.h" -namespace NKikimr::NOlap::NPlainReader { +namespace NKikimr::NOlap::NReader::NPlain { -TPlainReadData::TPlainReadData(const std::shared_ptr& context) +TPlainReadData::TPlainReadData(const std::shared_ptr& context) : TBase(context) , SpecialReadContext(std::make_shared(context)) { @@ -29,9 +29,9 @@ TPlainReadData::TPlainReadData(const std::shared_ptr& conte if (movePortion) { if ((*itPortion)->GetMeta().GetProduced() == NPortion::EProduced::COMPACTED || (*itPortion)->GetMeta().GetProduced() == NPortion::EProduced::SPLIT_COMPACTED) { - compactedPortionsBytes += (*itPortion)->BlobsBytes(); + compactedPortionsBytes += (*itPortion)->GetTotalBlobBytes(); } else { - insertedPortionsBytes += (*itPortion)->BlobsBytes(); + insertedPortionsBytes += (*itPortion)->GetTotalBlobBytes(); } sources.emplace_back(std::make_shared(sourceIdx++, *itPortion, SpecialReadContext, (*itPortion)->IndexKeyStart(), (*itPortion)->IndexKeyEnd())); ++itPortion; @@ -54,7 +54,7 @@ TPlainReadData::TPlainReadData(const std::shared_ptr& conte } -std::vector TPlainReadData::DoExtractReadyResults(const int64_t maxRowsInBatch) { +std::vector TPlainReadData::DoExtractReadyResults(const int64_t maxRowsInBatch) { auto result = TPartialReadResult::SplitResults(std::move(PartialResults), maxRowsInBatch); ui32 count = 0; for (auto&& r: result) { @@ -69,12 +69,12 @@ std::vector TPlainReadData::DoExtractReadyRe return result; } -bool TPlainReadData::DoReadNextInterval() { +TConclusion TPlainReadData::DoReadNextInterval() { return Scanner->BuildNextInterval(); } void TPlainReadData::OnIntervalResult(const std::shared_ptr& result) { - result->GetResourcesGuardOnly()->Update(result->GetMemorySize()); +// result->GetResourcesGuardOnly()->Update(result->GetMemorySize()); ReadyResultsCount += result->GetRecordsCount(); PartialResults.emplace_back(std::move(*result)); } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h similarity index 65% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.h rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h index 13e1f25f0d16..39d993b156d6 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/plain_read_data.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/plain_read_data.h @@ -3,13 +3,13 @@ #include "source.h" #include "scanner.h" -#include -#include -#include +#include +#include +#include -namespace NKikimr::NOlap::NPlainReader { +namespace NKikimr::NOlap::NReader::NPlain { -class TPlainReadData: public IDataReader, TNonCopyable { +class TPlainReadData: public IDataReader, TNonCopyable, NColumnShard::TMonitoringObjectsCounter { private: using TBase = IDataReader; std::shared_ptr Scanner; @@ -18,6 +18,10 @@ class TPlainReadData: public IDataReader, TNonCopyable { ui32 ReadyResultsCount = 0; bool AbortedFlag = false; protected: + virtual TConclusionStatus DoStart() override { + return Scanner->Start(); + } + virtual TString DoDebugString(const bool verbose) const override { TStringBuilder sb; sb << SpecialReadContext->DebugString() << ";"; @@ -27,8 +31,8 @@ class TPlainReadData: public IDataReader, TNonCopyable { return sb; } - virtual std::vector DoExtractReadyResults(const int64_t /*maxRowsInBatch*/) override; - virtual bool DoReadNextInterval() override; + virtual std::vector DoExtractReadyResults(const int64_t maxRowsInBatch) override; + virtual TConclusion DoReadNextInterval() override; virtual void DoAbort() override { AbortedFlag = true; @@ -40,6 +44,10 @@ class TPlainReadData: public IDataReader, TNonCopyable { return (Scanner->IsFinished() && PartialResults.empty()); } public: + virtual void OnSentDataFromInterval(const ui32 intervalIdx) const override { + Scanner->OnSentDataFromInterval(intervalIdx); + } + const TReadMetadata::TConstPtr& GetReadMetadata() const { return SpecialReadContext->GetReadMetadata(); } @@ -58,10 +66,10 @@ class TPlainReadData: public IDataReader, TNonCopyable { void OnIntervalResult(const std::shared_ptr& result); - TPlainReadData(const std::shared_ptr& context); + TPlainReadData(const std::shared_ptr& context); ~TPlainReadData() { if (!AbortedFlag) { - Abort(); + Abort("unexpected on destructor"); } } }; diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp new file mode 100644 index 000000000000..f0c789f3d205 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.cpp @@ -0,0 +1,302 @@ +#include "scanner.h" +#include "plain_read_data.h" +#include +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +void TScanHead::OnIntervalResult(const std::optional& newBatch, const std::shared_ptr& lastPK, + std::unique_ptr&& merger, const ui32 intervalIdx, TPlainReadData& reader) { + if (Context->GetReadMetadata()->Limit && (!newBatch || newBatch->GetRecordsCount() == 0) && InFlightLimit < 1000) { + if (++ZeroCount == std::max(16, InFlightLimit)) { + InFlightLimit *= 2; + ZeroCount = 0; + } + } else { + ZeroCount = 0; + } + auto itInterval = FetchingIntervals.find(intervalIdx); + AFL_VERIFY(itInterval != FetchingIntervals.end()); + itInterval->second->SetMerger(std::move(merger)); + AFL_VERIFY(Context->GetCommonContext()->GetReadMetadata()->IsSorted()); + if (newBatch && newBatch->GetRecordsCount()) { + const std::optional callbackIdxSubscriver = itInterval->second->HasMerger() ? std::optional(intervalIdx) : std::nullopt; + AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(itInterval->second->GetResourcesGuard(), *newBatch, lastPK, callbackIdxSubscriver)).second); + } else { + AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, nullptr).second); + } + Y_ABORT_UNLESS(FetchingIntervals.size()); + while (FetchingIntervals.size()) { + const auto interval = FetchingIntervals.begin()->second; + const ui32 intervalIdx = interval->GetIntervalIdx(); + auto it = ReadyIntervals.find(intervalIdx); + if (it == ReadyIntervals.end()) { + break; + } + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", it->second ? it->second->GetRecordsCount() : 0); + auto result = it->second; + ReadyIntervals.erase(it); + if (result) { + reader.OnIntervalResult(result); + } + if (!interval->HasMerger()) { + FetchingIntervals.erase(FetchingIntervals.begin()); + } else if (result) { + break; + } else { + interval->OnPartSendingComplete(); + } + } + if (FetchingIntervals.empty()) { + AFL_VERIFY(ReadyIntervals.empty()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "intervals_finished"); + } else { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "wait_interval")("remained", FetchingIntervals.size())("interval_idx", FetchingIntervals.begin()->first); + } +} + +TConclusionStatus TScanHead::Start() { + TScanContext context; + for (auto itPoint = BorderPoints.begin(); itPoint != BorderPoints.end(); ++itPoint) { + auto& point = itPoint->second; + context.OnStartPoint(point); + if (context.GetIsSpecialPoint()) { + auto detectorResult = DetectSourcesFeatureInContextIntervalScan(context.GetCurrentSources(), false); + for (auto&& i : context.GetCurrentSources()) { + i.second->IncIntervalsCount(); + } + if (!detectorResult) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")("reason", detectorResult.GetErrorMessage()); + Abort(); + return detectorResult; + } + } + for (auto&& i : point.GetFinishSources()) { + i->InitFetchingPlan(Context->GetColumnsFetchingPlan(i)); + } + context.OnFinishPoint(point); + if (context.GetCurrentSources().size()) { + auto itPointNext = itPoint; + Y_ABORT_UNLESS(++itPointNext != BorderPoints.end()); + context.OnNextPointInfo(itPointNext->second); + for (auto&& i : context.GetCurrentSources()) { + i.second->IncIntervalsCount(); + } + auto detectorResult = DetectSourcesFeatureInContextIntervalScan(context.GetCurrentSources(), context.GetIsExclusiveInterval()); + if (!detectorResult) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "scanner_initializer_aborted")("reason", detectorResult.GetErrorMessage()); + Abort(); + return detectorResult; + } + } + } + return TConclusionStatus::Success(); +} + +TScanHead::TScanHead(std::deque>&& sources, const std::shared_ptr& context) + : Context(context) +{ + InFlightLimit = Context->GetReadMetadata()->Limit ? 1 : Max(); + while (sources.size()) { + auto source = sources.front(); + BorderPoints[source->GetStart()].AddStart(source); + BorderPoints[source->GetFinish()].AddFinish(source); + sources.pop_front(); + } +} + +class TSourcesStorageForMemoryOptimization { +private: + class TSourceInfo { + private: + YDB_READONLY_DEF(std::shared_ptr, Source); + YDB_READONLY_DEF(std::shared_ptr, FetchingInfo); + public: + TSourceInfo(const std::shared_ptr& source, const std::shared_ptr& fetchingInfo) + : Source(source) + , FetchingInfo(fetchingInfo) + { + + } + + NJson::TJsonValue DebugJson() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("source", Source->DebugJsonForMemory()); +// result.InsertValue("fetching", Fetching->DebugJsonForMemory()); + return result; + } + }; + + std::map> Sources; + YDB_READONLY(ui64, MemorySum, 0); + YDB_READONLY_DEF(std::set, PathIds); +public: + TString DebugString() const { + NJson::TJsonValue resultJson; + auto& memorySourcesArr = resultJson.InsertValue("sources_by_memory", NJson::JSON_ARRAY); + resultJson.InsertValue("sources_by_memory_count", Sources.size()); + for (auto it = Sources.rbegin(); it != Sources.rend(); ++it) { + auto& sourceMap = memorySourcesArr.AppendValue(NJson::JSON_MAP); + sourceMap.InsertValue("memory", it->first); + auto& sourcesArr = sourceMap.InsertValue("sources", NJson::JSON_ARRAY); + for (auto&& s : it->second) { + sourcesArr.AppendValue(s.second.DebugJson()); + } + } + return resultJson.GetStringRobust(); + } + + void UpdateSource(const ui64 oldMemoryInfo, const ui32 sourceIdx) { + auto it = Sources.find(oldMemoryInfo); + AFL_VERIFY(it != Sources.end()); + auto itSource = it->second.find(sourceIdx); + AFL_VERIFY(itSource != it->second.end()); + auto sourceInfo = itSource->second; + it->second.erase(itSource); + if (it->second.empty()) { + Sources.erase(it); + } + AFL_VERIFY(MemorySum >= oldMemoryInfo); + MemorySum -= oldMemoryInfo; + AddSource(sourceInfo.GetSource(), sourceInfo.GetFetchingInfo()); + } + + void AddSource(const std::shared_ptr& source, const std::shared_ptr& fetching) { + const ui64 sourceMemory = fetching->PredictRawBytes(source); + MemorySum += sourceMemory; + AFL_VERIFY(Sources[sourceMemory].emplace(source->GetSourceIdx(), TSourceInfo(source, fetching)).second); + PathIds.emplace(source->GetPathId()); + } + + bool Optimize(const ui64 memoryLimit) { + bool modified = true; + while (MemorySum > memoryLimit && modified) { + modified = false; + for (auto it = Sources.rbegin(); it != Sources.rend(); ++it) { + for (auto&& [sourceIdx, sourceInfo] : it->second) { + if (!sourceInfo.GetFetchingInfo()->InitSourceSeqColumnIds(sourceInfo.GetSource())) { + continue; + } + modified = true; + UpdateSource(it->first, sourceIdx); + break; + } + if (modified) { + break; + } + } + } + return MemorySum < memoryLimit; + } +}; + +TConclusionStatus TScanHead::DetectSourcesFeatureInContextIntervalScan(const THashMap>& intervalSources, const bool isExclusiveInterval) const { + TSourcesStorageForMemoryOptimization optimizer; + for (auto&& i : intervalSources) { + if (!isExclusiveInterval) { + i.second->SetExclusiveIntervalOnly(false); + } + auto fetchingPlan = Context->GetColumnsFetchingPlan(i.second); + optimizer.AddSource(i.second, fetchingPlan); + } + const ui64 startMemory = optimizer.GetMemorySum(); + if (!optimizer.Optimize(Context->ReduceMemoryIntervalLimit) && Context->RejectMemoryIntervalLimit < optimizer.GetMemorySum()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "next_internal_broken") + ("reason", "a lot of memory need")("start", startMemory) + ("reduce_limit", Context->ReduceMemoryIntervalLimit) + ("reject_limit", Context->RejectMemoryIntervalLimit) + ("need", optimizer.GetMemorySum()) + ("path_ids", JoinSeq(",", optimizer.GetPathIds())) + ("details", IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_DEBUG, NKikimrServices::TX_COLUMNSHARD_SCAN) ? optimizer.DebugString() : "NEED_DEBUG_LEVEL"); + Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryFailed(optimizer.GetMemorySum()); + return TConclusionStatus::Fail("We need a lot of memory in time for interval scanner: " + + ::ToString(optimizer.GetMemorySum()) + " path_ids: " + JoinSeq(",", optimizer.GetPathIds()) + ". We need wait compaction processing. Sorry."); + } else if (optimizer.GetMemorySum() < startMemory) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "memory_reduce_active") + ("reason", "need reduce memory")("start", startMemory) + ("reduce_limit", Context->ReduceMemoryIntervalLimit) + ("reject_limit", Context->RejectMemoryIntervalLimit) + ("need", optimizer.GetMemorySum()) + ("path_ids", JoinSeq(",", optimizer.GetPathIds())); + Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryReduced(startMemory - optimizer.GetMemorySum()); + } + Context->GetCommonContext()->GetCounters().OnOptimizedIntervalMemoryRequired(optimizer.GetMemorySum()); + return TConclusionStatus::Success(); +} + +TConclusion TScanHead::BuildNextInterval() { + if (AbortFlag) { + return false; + } + while (BorderPoints.size() && (FetchingIntervals.size() < InFlightLimit || BorderPoints.begin()->second.GetStartSources().empty())) { + auto firstBorderPointInfo = std::move(BorderPoints.begin()->second); + CurrentState.OnStartPoint(firstBorderPointInfo); + + if (CurrentState.GetIsSpecialPoint()) { + const ui32 intervalIdx = SegmentIdxCounter++; + auto interval = std::make_shared( + BorderPoints.begin()->first, BorderPoints.begin()->first, intervalIdx, CurrentState.GetCurrentSources(), + Context, true, true, false); + FetchingIntervals.emplace(intervalIdx, interval); + IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), true); + NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)("interval", interval->DebugJson()); + } + + CurrentState.OnFinishPoint(firstBorderPointInfo); + + CurrentStart = BorderPoints.begin()->first; + BorderPoints.erase(BorderPoints.begin()); + if (CurrentState.GetCurrentSources().size()) { + Y_ABORT_UNLESS(BorderPoints.size()); + CurrentState.OnNextPointInfo(BorderPoints.begin()->second); + const ui32 intervalIdx = SegmentIdxCounter++; + auto interval = std::make_shared(*CurrentStart, BorderPoints.begin()->first, intervalIdx, CurrentState.GetCurrentSources(), Context, + CurrentState.GetIncludeFinish(), CurrentState.GetIncludeStart(), CurrentState.GetIsExclusiveInterval()); + FetchingIntervals.emplace(intervalIdx, interval); + IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), false); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)("interval", interval->DebugJson()); + NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); + return true; + } else { + IntervalStats.emplace_back(CurrentState.GetCurrentSources().size(), false); + } + } + return false; +} + +const TReadContext& TScanHead::GetContext() const { + return *Context->GetCommonContext(); +} + +bool TScanHead::IsReverse() const { + return GetContext().GetReadMetadata()->IsDescSorted(); +} + +void TScanHead::Abort() { + AbortFlag = true; + THashSet sourceIds; + for (auto&& i : FetchingIntervals) { + for (auto&& s : i.second->GetSources()) { + sourceIds.emplace(s.first); + } + i.second->Abort(); + } + for (auto&& i : BorderPoints) { + for (auto&& s : i.second.GetStartSources()) { + if (sourceIds.emplace(s->GetSourceIdx()).second) { + s->Abort(); + } + } + for (auto&& s : i.second.GetFinishSources()) { + if (sourceIds.emplace(s->GetSourceIdx()).second) { + s->Abort(); + } + } + } + FetchingIntervals.clear(); + BorderPoints.clear(); + Y_ABORT_UNLESS(IsFinished()); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h new file mode 100644 index 000000000000..75de439aec63 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/scanner.h @@ -0,0 +1,123 @@ +#pragma once +#include "source.h" +#include "interval.h" +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NPlain { + +class TPlainReadData; + +class TDataSourceEndpoint { +private: + YDB_READONLY_DEF(std::vector>, StartSources); + YDB_READONLY_DEF(std::vector>, FinishSources); +public: + void AddStart(std::shared_ptr source) { + StartSources.emplace_back(source); + } + void AddFinish(std::shared_ptr source) { + FinishSources.emplace_back(source); + } +}; + +class TIntervalStat { +private: + YDB_READONLY(ui32, SourcesCount, 0); + YDB_READONLY(bool, IsPoint, false); +public: + TIntervalStat(const ui32 sourcesCount, const bool isPoint) + : SourcesCount(sourcesCount) + , IsPoint(isPoint) + { + + } +}; + +class TScanContext { +private: + using TCurrentSources = THashMap>; + YDB_READONLY(bool, IncludeStart, false); + YDB_READONLY(bool, IncludeFinish, false); + YDB_READONLY_DEF(TCurrentSources, CurrentSources); + YDB_READONLY(bool, IsSpecialPoint, false); + YDB_READONLY(bool, IsExclusiveInterval, false); +public: + void OnStartPoint(const TDataSourceEndpoint& point) { + IsSpecialPoint = point.GetStartSources().size() && point.GetFinishSources().size(); + IncludeStart = point.GetStartSources().size() && !IsSpecialPoint; + for (auto&& i : point.GetStartSources()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("add_source", i->GetSourceIdx()); + AFL_VERIFY(CurrentSources.emplace(i->GetSourceIdx(), i).second)("idx", i->GetSourceIdx()); + } + } + + void OnFinishPoint(const TDataSourceEndpoint& point) { + for (auto&& i : point.GetFinishSources()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("remove_source", i->GetSourceIdx()); + AFL_VERIFY(CurrentSources.erase(i->GetSourceIdx()))("idx", i->GetSourceIdx()); + } + } + + void OnNextPointInfo(const TDataSourceEndpoint& nextPoint) { + IncludeFinish = nextPoint.GetStartSources().empty(); + IsExclusiveInterval = (CurrentSources.size() == 1) && IncludeStart && IncludeFinish; + } +}; + +class TScanHead { +private: + std::shared_ptr Context; + bool SourcesInitialized = false; + TScanContext CurrentState; + std::map BorderPoints; + std::optional CurrentStart; + std::map> FetchingIntervals; + THashMap> ReadyIntervals; + ui32 SegmentIdxCounter = 0; + std::vector IntervalStats; + ui64 InFlightLimit = 1; + ui64 ZeroCount = 0; + bool AbortFlag = false; + void DrainSources(); + [[nodiscard]] TConclusionStatus DetectSourcesFeatureInContextIntervalScan(const THashMap>& intervalSources, const bool isExclusiveInterval) const; +public: + void OnSentDataFromInterval(const ui32 intervalIdx) const { + if (AbortFlag) { + return; + } + auto it = FetchingIntervals.find(intervalIdx); + AFL_VERIFY(it != FetchingIntervals.end())("interval_idx", intervalIdx)("count", FetchingIntervals.size()); + it->second->OnPartSendingComplete(); + } + + bool IsReverse() const; + void Abort(); + + bool IsFinished() const { + return BorderPoints.empty() && FetchingIntervals.empty(); + } + + const TReadContext& GetContext() const; + + TString DebugString() const { + TStringBuilder sb; + for (auto&& i : IntervalStats) { + sb << (i.GetIsPoint() ? "^" : "") << i.GetSourcesCount() << ";"; + } + return sb; + } + + void OnIntervalResult(const std::optional& batch, const std::shared_ptr& lastPK, + std::unique_ptr&& merger, const ui32 intervalIdx, TPlainReadData& reader); + + TConclusionStatus Start(); + + TScanHead(std::deque>&& sources, const std::shared_ptr& context); + + [[nodiscard]] TConclusion BuildNextInterval(); + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/source.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp similarity index 59% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/source.cpp rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp index 3ba21e93aef1..723ba175f571 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/source.cpp +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.cpp @@ -9,26 +9,16 @@ #include #include -namespace NKikimr::NOlap::NPlainReader { +namespace NKikimr::NOlap::NReader::NPlain { -void IDataSource::InitFetchingPlan(const std::shared_ptr& fetchingFirstStep, const std::shared_ptr& sourcePtr, const bool isExclusive) { - AFL_VERIFY(fetchingFirstStep); - if (AtomicCas(&FilterStageFlag, 1, 0)) { - StageData = std::make_unique(isExclusive); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("InitFetchingPlan", fetchingFirstStep->DebugString())("source_idx", SourceIdx); - NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("source", SourceIdx)("method", "InitFetchingPlan")); - if (IsAborted()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "InitFetchingPlanAborted"); - return; - } - if (fetchingFirstStep->ExecuteInplace(sourcePtr, fetchingFirstStep)) { - auto task = std::make_shared(sourcePtr, fetchingFirstStep->GetNextStep(), Context->GetCommonContext()->GetScanActorId()); - NConveyor::TScanServiceOperator::SendTaskToExecute(task); - } - } +void IDataSource::InitFetchingPlan(const std::shared_ptr& fetching) { + AFL_VERIFY(fetching); + AFL_VERIFY(!FetchingPlan); + FetchingPlan = fetching; } void IDataSource::RegisterInterval(TFetchingInterval& interval) { + AFL_VERIFY(FetchingPlan); if (!IsReadyFlag) { AFL_VERIFY(Intervals.emplace(interval.GetIntervalIdx(), &interval).second); } @@ -44,8 +34,24 @@ void IDataSource::SetIsReady() { Intervals.clear(); } +void IDataSource::OnInitResourcesGuard(const std::shared_ptr& sourcePtr) { + AFL_VERIFY(FetchingPlan); + if (AtomicCas(&FilterStageFlag, 1, 0)) { + StageData = std::make_unique(GetExclusiveIntervalOnly() && IsSourceInMemory()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("InitFetchingPlan", FetchingPlan->DebugString())("source_idx", SourceIdx); + NActors::TLogContextGuard logGuard(NActors::TLogContextBuilder::Build()("source", SourceIdx)("method", "InitFetchingPlan")); + if (IsAborted()) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "InitFetchingPlanAborted"); + return; + } + TFetchingScriptCursor cursor(FetchingPlan, 0); + auto task = std::make_shared(sourcePtr, std::move(cursor), Context->GetCommonContext()->GetScanActorId()); + NConveyor::TScanServiceOperator::SendTaskToExecute(task); + } +} + void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, - const std::shared_ptr& readingAction, THashMap& nullBlocks, + TBlobsAction& blobsAction, THashMap& nullBlocks, const std::shared_ptr& filter) { const NArrow::TColumnFilter& cFilter = filter ? *filter : NArrow::TColumnFilter::BuildAllowFilter(); ui32 fetchedChunks = 0; @@ -59,52 +65,52 @@ void TPortionDataSource::NeedFetchColumns(const std::set& columnIds, bool itFinished = false; for (auto&& c : columnChunks) { Y_ABORT_UNLESS(!itFinished); - if (!itFilter.IsBatchForSkip(c->GetMeta().GetNumRowsVerified())) { - readingAction->AddRange(c->BlobRange); + if (!itFilter.IsBatchForSkip(c->GetMeta().GetNumRows())) { + auto reading = blobsAction.GetReading(Schema->GetIndexInfo().GetColumnStorageId(c->GetColumnId(), Portion->GetMeta().GetTierName())); + reading->SetIsBackgroundProcess(false); + reading->AddRange(Portion->RestoreBlobRange(c->BlobRange)); ++fetchedChunks; } else { - nullBlocks.emplace(c->BlobRange, c->GetMeta().GetNumRowsVerified()); + nullBlocks.emplace(c->GetAddress(), c->GetMeta().GetNumRows()); ++nullChunks; } - itFinished = !itFilter.Next(c->GetMeta().GetNumRowsVerified()); + itFinished = !itFilter.Next(c->GetMeta().GetNumRows()); } AFL_VERIFY(itFinished)("filter", itFilter.DebugString())("count", Portion->NumRows(i)); } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks)("reading_action", readingAction->GetStorageId())("columns", columnIds.size()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "chunks_stats")("fetch", fetchedChunks)("null", nullChunks)("reading_actions", blobsAction.GetStorageIds())("columns", columnIds.size()); } -bool TPortionDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& columns) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step->GetName()); +bool TPortionDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); AFL_VERIFY(columns->GetColumnsCount()); AFL_VERIFY(!StageData->GetAppliedFilter() || !StageData->GetAppliedFilter()->IsTotalDenyFilter()); auto& columnIds = columns->GetColumnIds(); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step->GetName())("fetching_info", step->DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); - auto readAction = Portion->GetBlobsStorage()->StartReadingAction("CS::READ::" + step->GetName()); - readAction->SetIsBackgroundProcess(false); + TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); { - THashMap nullBlocks; - NeedFetchColumns(columnIds, readAction, nullBlocks, StageData->GetAppliedFilter()); + THashMap nullBlocks; + NeedFetchColumns(columnIds, action, nullBlocks, StageData->GetAppliedFilter()); StageData->AddNulls(std::move(nullBlocks)); } - if (!readAction->GetExpectedBlobsSize()) { + auto readActions = action.GetReadingActions(); + if (!readActions.size()) { return false; } - std::vector> actions = {readAction}; - auto constructor = std::make_shared(actions, sourcePtr, step, GetContext(), "CS::READ::" + step->GetName(), ""); + auto constructor = std::make_shared(readActions, sourcePtr, step, GetContext(), "CS::READ::" + step.GetName(), ""); NActors::TActivationContext::AsActorContext().Register(new NOlap::NBlobOperations::NRead::TActor(constructor)); return true; } -bool TPortionDataSource::DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& indexes) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step->GetName()); +bool TPortionDataSource::DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName()); Y_ABORT_UNLESS(indexes->GetIndexesCount()); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step->GetName())("fetching_info", step->DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); - auto readAction = Portion->GetBlobsStorage()->StartReadingAction("CS::READ::" + step->GetName()); - readAction->SetIsBackgroundProcess(false); + TBlobsAction action(GetContext()->GetCommonContext()->GetStoragesManager(), NBlobOperations::EConsumer::SCAN); { std::set indexIds; for (auto&& i : Portion->GetIndexes()) { @@ -112,20 +118,21 @@ bool TPortionDataSource::DoStartFetchingIndexes(const std::shared_ptrAddRange(i.GetBlobRange()); + auto readAction = action.GetReading(Schema->GetIndexInfo().GetIndexStorageId(i.GetIndexId())); + readAction->SetIsBackgroundProcess(false); + readAction->AddRange(Portion->RestoreBlobRange(i.GetBlobRange())); } if (indexes->GetIndexIdsSet().size() != indexIds.size()) { return false; } } - - if (!readAction->GetExpectedBlobsSize()) { + auto readingActions = action.GetReadingActions(); + if (!readingActions.size()) { NYDBTest::TControllers::GetColumnShardController()->OnIndexSelectProcessed({}); return false; } - std::vector> actions = {readAction}; - auto constructor = std::make_shared(actions, sourcePtr, step, GetContext(), "CS::READ::" + step->GetName(), ""); + auto constructor = std::make_shared(readingActions, sourcePtr, step, GetContext(), "CS::READ::" + step.GetName(), ""); NActors::TActivationContext::AsActorContext().Register(new NOlap::NBlobOperations::NRead::TActor(constructor)); return true; } @@ -144,7 +151,7 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in if (!indexIds.contains(i->GetIndexId())) { continue; } - indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetBlobRange())); + indexBlobs[i->GetIndexId()].emplace_back(StageData->ExtractBlob(i->GetAddress())); } for (auto&& i : indexIds) { if (!indexBlobs.contains(i)) { @@ -169,26 +176,57 @@ void TPortionDataSource::DoApplyIndex(const NIndexes::TIndexCheckerContainer& in } } -bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& /*columns*/) { +void TPortionDataSource::DoAssembleColumns(const std::shared_ptr& columns) { + auto blobSchema = GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion); + if (SequentialEntityIds.empty()) { + MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs()).AssembleTable()); + } else { + { + auto inMemColumns = columns->GetColumnIds(); + for (auto&& i : SequentialEntityIds) { + inMemColumns.erase(i); + } + if (inMemColumns.size()) { + auto filteredSchema = std::make_shared(columns->GetFilteredSchemaPtrVerified(), inMemColumns); + MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, *filteredSchema, MutableStageData().MutableBlobs()).AssembleTable()); + } + } + { + std::set scanColumns; + for (auto&& i : columns->GetColumnIds()) { + if (SequentialEntityIds.contains(i)) { + scanColumns.emplace(i); + } + } + if (scanColumns.size()) { + auto filteredSchema = std::make_shared(columns->GetFilteredSchemaPtrVerified(), scanColumns); + MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, *filteredSchema, MutableStageData().MutableBlobs()).AssembleForSeqAccess()); + } + } + } +} + +bool TCommittedDataSource::DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& /*columns*/) { if (ReadStarted) { return false; } ReadStarted = true; - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step->GetName())("fetching_info", step->DebugString()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", step.GetName())("fetching_info", step.DebugString()); std::shared_ptr storageOperator = GetContext()->GetCommonContext()->GetStoragesManager()->GetInsertOperator(); - auto readAction = storageOperator->StartReadingAction("CS::READ::" + step->GetName()); + auto readAction = storageOperator->StartReadingAction(NBlobOperations::EConsumer::SCAN); readAction->SetIsBackgroundProcess(false); readAction->AddRange(CommittedBlob.GetBlobRange()); std::vector> actions = {readAction}; - auto constructor = std::make_shared(actions, sourcePtr, step, GetContext(), "CS::READ::" + step->GetName(), ""); + auto constructor = std::make_shared(actions, sourcePtr, step, GetContext(), "CS::READ::" + step.GetName(), ""); NActors::TActivationContext::AsActorContext().Register(new NOlap::NBlobOperations::NRead::TActor(constructor)); return true; } void TCommittedDataSource::DoAssembleColumns(const std::shared_ptr& columns) { + TMemoryProfileGuard mGuard("SCAN_PROFILE::ASSEMBLER::COMMITTED", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); if (!GetStageData().GetTable()) { Y_ABORT_UNLESS(GetStageData().GetBlobs().size() == 1); auto bData = MutableStageData().ExtractBlob(GetStageData().GetBlobs().begin()->first); diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/source.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h similarity index 51% rename from ydb/core/tx/columnshard/engines/reader/plain_reader/source.h rename to ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h index 3cce8e16eef7..8e95c20d79e2 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/source.h +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/source.h @@ -2,19 +2,22 @@ #include "context.h" #include "columns_set.h" #include "fetched_data.h" -#include -#include #include +#include +#include #include #include -#include +#include +#include #include +#include +#include namespace NKikimr::NOlap { class IDataReader; } -namespace NKikimr::NOlap::NPlainReader { +namespace NKikimr::NOlap::NReader::NPlain { class TFetchingInterval; class TPlainReadData; @@ -23,19 +26,22 @@ class IFetchingStep; class IDataSource { private: + YDB_ACCESSOR(bool, ExclusiveIntervalOnly, true); YDB_READONLY(ui32, SourceIdx, 0); - YDB_READONLY_DEF(NIndexedReader::TSortableBatchPosition, Start); - YDB_READONLY_DEF(NIndexedReader::TSortableBatchPosition, Finish); + YDB_READONLY_DEF(NArrow::NMerger::TSortableBatchPosition, Start); + YDB_READONLY_DEF(NArrow::NMerger::TSortableBatchPosition, Finish); NArrow::TReplaceKey StartReplaceKey; NArrow::TReplaceKey FinishReplaceKey; YDB_READONLY_DEF(std::shared_ptr, Context); YDB_READONLY(TSnapshot, RecordSnapshotMax, TSnapshot::Zero()); - std::optional RecordsCount; + YDB_READONLY(ui32, RecordsCount, 0); YDB_READONLY(ui32, IntervalsCount, 0); virtual NJson::TJsonValue DoDebugJson() const = 0; bool MergingStartedFlag = false; bool AbortedFlag = false; + std::shared_ptr FetchingPlan; protected: + bool IsSourceInMemoryFlag = true; THashMap Intervals; std::unique_ptr StageData; @@ -44,16 +50,37 @@ class IDataSource { TAtomic FilterStageFlag = 0; bool IsReadyFlag = false; - bool IsAborted() const { - return AbortedFlag; - } - - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& columns) = 0; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& indexes) = 0; + virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) = 0; + virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) = 0; virtual void DoAssembleColumns(const std::shared_ptr& columns) = 0; virtual void DoAbort() = 0; virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexMeta) = 0; + virtual bool DoAddSequentialEntityIds(const ui32 entityId) = 0; + virtual NJson::TJsonValue DoDebugJsonForMemory() const { + return NJson::JSON_MAP; + } public: + void OnInitResourcesGuard(const std::shared_ptr& sourcePtr); + + bool IsAborted() const { + return AbortedFlag; + } + bool IsSourceInMemory() const { + return IsSourceInMemoryFlag; + } + virtual bool IsSourceInMemory(const std::set& fieldIds) const = 0; + bool AddSequentialEntityIds(const ui32 entityId) { + if (DoAddSequentialEntityIds(entityId)) { + IsSourceInMemoryFlag = false; + return true; + } + return false; + } + virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const = 0; + + virtual ui64 GetPathId() const = 0; + virtual bool HasIndexes(const std::set& indexIds) const = 0; + const NArrow::TReplaceKey& GetStartReplaceKey() const { return StartReplaceKey; } @@ -69,13 +96,10 @@ class IDataSource { void SetIsReady(); void Finalize() { + TMemoryProfileGuard mpg("SCAN_PROFILE::STAGE_RESULT", IS_DEBUG_LOG_ENABLED(NKikimrServices::TX_COLUMNSHARD_SCAN_MEMORY)); StageResult = std::make_unique(std::move(StageData)); } - bool IsEmptyData() const { - return GetStageData().IsEmpty(); - } - void ApplyIndex(const NIndexes::TIndexCheckerContainer& indexMeta) { return DoApplyIndex(indexMeta); } @@ -87,16 +111,16 @@ class IDataSource { DoAssembleColumns(columns); } - bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& columns) { + bool StartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) { AFL_VERIFY(columns); return DoStartFetchingColumns(sourcePtr, step, columns); } - bool StartFetchingIndexes(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& indexes) { + bool StartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) { AFL_VERIFY(indexes); return DoStartFetchingIndexes(sourcePtr, step, indexes); } - void InitFetchingPlan(const std::shared_ptr& fetchingFirstStep, const std::shared_ptr& sourcePtr, const bool isExclusive); + void InitFetchingPlan(const std::shared_ptr& fetching); std::shared_ptr GetLastPK() const { return Finish.ExtractSortingPosition(); @@ -105,8 +129,8 @@ class IDataSource { ++IntervalsCount; } - virtual ui64 GetRawBytes(const std::set& columnIds) const = 0; - virtual ui64 GetIndexBytes(const std::set& indexIds) const = 0; + virtual ui64 GetColumnRawBytes(const std::set& columnIds) const = 0; + virtual ui64 GetIndexRawBytes(const std::set& indexIds) const = 0; bool IsMergingStarted() const { return MergingStartedFlag; @@ -123,6 +147,13 @@ class IDataSource { DoAbort(); } + NJson::TJsonValue DebugJsonForMemory() const { + NJson::TJsonValue result = NJson::JSON_MAP; + result.InsertValue("details", DoDebugJsonForMemory()); + result.InsertValue("count", RecordsCount); + return result; + } + NJson::TJsonValue DebugJson() const { NJson::TJsonValue result = NJson::JSON_MAP; result.InsertValue("source_idx", SourceIdx); @@ -148,17 +179,11 @@ class IDataSource { return *StageData; } - ui32 GetRecordsCount() const { - AFL_VERIFY(RecordsCount); - return *RecordsCount; - } - void RegisterInterval(TFetchingInterval& interval); IDataSource(const ui32 sourceIdx, const std::shared_ptr& context, const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish, - const TSnapshot& recordSnapshotMax, const std::optional recordsCount - ) + const TSnapshot& recordSnapshotMax, const ui32 recordsCount) : SourceIdx(sourceIdx) , Start(context->GetReadMetadata()->BuildSortedPosition(start)) , Finish(context->GetReadMetadata()->BuildSortedPosition(finish)) @@ -183,19 +208,18 @@ class IDataSource { class TPortionDataSource: public IDataSource { private: using TBase = IDataSource; + std::set SequentialEntityIds; std::shared_ptr Portion; + std::shared_ptr Schema; void NeedFetchColumns(const std::set& columnIds, - const std::shared_ptr& readingAction, THashMap& nullBlocks, + TBlobsAction& blobsAction, THashMap& nullBlocks, const std::shared_ptr& filter); virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& indexChecker) override; - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& columns) override; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& indexes) override; - virtual void DoAssembleColumns(const std::shared_ptr& columns) override { - auto blobSchema = GetContext()->GetReadMetadata()->GetLoadSchema(Portion->GetMinSnapshot()); - MutableStageData().AddBatch(Portion->PrepareForAssemble(*blobSchema, columns->GetFilteredSchemaVerified(), MutableStageData().MutableBlobs()).AssembleTable()); - } + virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) override; + virtual bool DoStartFetchingIndexes(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& indexes) override; + virtual void DoAssembleColumns(const std::shared_ptr& columns) override; virtual NJson::TJsonValue DoDebugJson() const override { NJson::TJsonValue result = NJson::JSON_MAP; result.InsertValue("type", "portion"); @@ -203,14 +227,72 @@ class TPortionDataSource: public IDataSource { return result; } + virtual NJson::TJsonValue DoDebugJsonForMemory() const override { + NJson::TJsonValue result = TBase::DoDebugJsonForMemory(); + auto columns = Portion->GetColumnIds(); + for (auto&& i : SequentialEntityIds) { + AFL_VERIFY(columns.erase(i)); + } +// result.InsertValue("sequential_columns", JoinSeq(",", SequentialEntityIds)); + if (SequentialEntityIds.size()) { + result.InsertValue("min_memory_seq", Portion->GetMinMemoryForReadColumns(SequentialEntityIds)); + result.InsertValue("min_memory_seq_blobs", Portion->GetColumnBlobBytes(SequentialEntityIds)); + result.InsertValue("in_mem", Portion->GetColumnRawBytes(columns, false)); + } + result.InsertValue("columns_in_mem", JoinSeq(",", columns)); + result.InsertValue("portion_id", Portion->GetPortionId()); + result.InsertValue("raw", Portion->GetTotalRawBytes()); + result.InsertValue("blob", Portion->GetTotalBlobBytes()); + result.InsertValue("read_memory", GetColumnRawBytes(Portion->GetColumnIds())); + return result; + } virtual void DoAbort() override; + virtual ui64 GetPathId() const override { + return Portion->GetPathId(); + } + virtual bool DoAddSequentialEntityIds(const ui32 entityId) override { + return SequentialEntityIds.emplace(entityId).second; + } + public: - virtual ui64 GetRawBytes(const std::set& columnIds) const override { - return Portion->GetRawBytes(columnIds); + virtual bool HasIndexes(const std::set& indexIds) const override { + return Portion->HasIndexes(indexIds); + } + + virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const override { + return Portion->DecodeBlobAddresses(std::move(blobsOriginal), Schema->GetIndexInfo()); + } + + virtual bool IsSourceInMemory(const std::set& fieldIds) const override { + for (auto&& i : SequentialEntityIds) { + if (fieldIds.contains(i)) { + return false; + } + } + return true; + } + + virtual ui64 GetColumnRawBytes(const std::set& columnsIds) const override { + if (SequentialEntityIds.size()) { + std::set selectedSeq; + std::set selectedInMem; + for (auto&& i : columnsIds) { + if (SequentialEntityIds.contains(i)) { + selectedSeq.emplace(i); + } else { + selectedInMem.emplace(i); + } + } + return Portion->GetMinMemoryForReadColumns(selectedSeq) + + Portion->GetColumnBlobBytes(selectedSeq) + + Portion->GetColumnRawBytes(selectedInMem, false); + } else { + return Portion->GetColumnRawBytes(columnsIds, false); + } } - virtual ui64 GetIndexBytes(const std::set& columnIds) const override { - return Portion->GetIndexBytes(columnIds); + virtual ui64 GetIndexRawBytes(const std::set& indexIds) const override { + return Portion->GetIndexRawBytes(indexIds, false); } const TPortionInfo& GetPortionInfo() const { @@ -225,6 +307,7 @@ class TPortionDataSource: public IDataSource { const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish) : TBase(sourceIdx, context, start, finish, portion->RecordSnapshotMax(), portion->GetRecordsCount()) , Portion(portion) + , Schema(GetContext()->GetReadMetadata()->GetLoadSchemaVerified(*Portion)) { } }; @@ -239,8 +322,8 @@ class TCommittedDataSource: public IDataSource { } - virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const std::shared_ptr& step, const std::shared_ptr& columns) override; - virtual bool DoStartFetchingIndexes(const std::shared_ptr& /*sourcePtr*/, const std::shared_ptr& /*step*/, const std::shared_ptr& /*indexes*/) override { + virtual bool DoStartFetchingColumns(const std::shared_ptr& sourcePtr, const TFetchingScriptCursor& step, const std::shared_ptr& columns) override; + virtual bool DoStartFetchingIndexes(const std::shared_ptr& /*sourcePtr*/, const TFetchingScriptCursor& /*step*/, const std::shared_ptr& /*indexes*/) override { return false; } virtual void DoApplyIndex(const NIndexes::TIndexCheckerContainer& /*indexMeta*/) override { @@ -254,12 +337,37 @@ class TCommittedDataSource: public IDataSource { result.InsertValue("info", CommittedBlob.DebugString()); return result; } + virtual ui64 GetPathId() const override { + return 0; + } + virtual bool DoAddSequentialEntityIds(const ui32 /*entityId*/) override { + return false; + } public: - virtual ui64 GetRawBytes(const std::set& /*columnIds*/) const override { + virtual THashMap DecodeBlobAddresses(NBlobOperations::NRead::TCompositeReadBlobs&& blobsOriginal) const override { + THashMap result; + for (auto&& i : blobsOriginal) { + for (auto&& b : i.second) { + result.emplace(TChunkAddress(1, 1), std::move(b.second)); + } + } + return result; + } + + virtual bool IsSourceInMemory(const std::set& /*fieldIds*/) const override { + return true; + } + + virtual bool HasIndexes(const std::set& /*indexIds*/) const override { + return false; + } + + virtual ui64 GetColumnRawBytes(const std::set& /*columnIds*/) const override { return CommittedBlob.GetBlobRange().Size; } - virtual ui64 GetIndexBytes(const std::set& /*columnIds*/) const override { + virtual ui64 GetIndexRawBytes(const std::set& /*columnIds*/) const override { + AFL_VERIFY(false); return 0; } @@ -269,7 +377,7 @@ class TCommittedDataSource: public IDataSource { TCommittedDataSource(const ui32 sourceIdx, const TCommittedBlob& committed, const std::shared_ptr& context, const NArrow::TReplaceKey& start, const NArrow::TReplaceKey& finish) - : TBase(sourceIdx, context, start, finish, committed.GetSnapshot(), {}) + : TBase(sourceIdx, context, start, finish, committed.GetSnapshot(), committed.GetRecordsCount()) , CommittedBlob(committed) { } diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make new file mode 100644 index 000000000000..cfa691a22e84 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/iterator/ya.make @@ -0,0 +1,23 @@ +LIBRARY() + +SRCS( + scanner.cpp + constructor.cpp + source.cpp + interval.cpp + fetched_data.cpp + plain_read_data.cpp + merge.cpp + columns_set.cpp + context.cpp + fetching.cpp + iterator.cpp +) + +PEERDIR( + ydb/core/formats/arrow + ydb/core/tx/columnshard/blobs_action + ydb/core/tx/conveyor/usage +) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.cpp b/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.cpp deleted file mode 100644 index 7611a4488ff2..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.cpp +++ /dev/null @@ -1,143 +0,0 @@ -#include "scanner.h" -#include "plain_read_data.h" -#include -#include - -namespace NKikimr::NOlap::NPlainReader { - -void TScanHead::OnIntervalResult(const std::optional& newBatch, const std::shared_ptr& lastPK, const ui32 intervalIdx, TPlainReadData& reader) { - if (Context->GetReadMetadata()->Limit && (!newBatch || newBatch->GetRecordsCount() == 0) && InFlightLimit < 1000) { - if (++ZeroCount == std::max(16, InFlightLimit)) { - InFlightLimit *= 2; - ZeroCount = 0; - } - } else { - ZeroCount = 0; - } - auto itInterval = FetchingIntervals.find(intervalIdx); - AFL_VERIFY(itInterval != FetchingIntervals.end()); - if (!Context->GetCommonContext()->GetReadMetadata()->IsSorted()) { - if (newBatch && newBatch->GetRecordsCount()) { - reader.OnIntervalResult(std::make_shared(itInterval->second->GetResourcesGuard(), *newBatch, lastPK)); - } - AFL_VERIFY(FetchingIntervals.erase(intervalIdx)); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", newBatch ? newBatch->GetRecordsCount() : 0); - } else { - if (newBatch && newBatch->GetRecordsCount()) { - AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, std::make_shared(itInterval->second->GetResourcesGuard(), *newBatch, lastPK)).second); - } else { - AFL_VERIFY(ReadyIntervals.emplace(intervalIdx, nullptr).second); - } - Y_ABORT_UNLESS(FetchingIntervals.size()); - while (FetchingIntervals.size()) { - const auto interval = FetchingIntervals.begin()->second; - const ui32 intervalIdx = interval->GetIntervalIdx(); - auto it = ReadyIntervals.find(intervalIdx); - if (it == ReadyIntervals.end()) { - break; - } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "interval_result")("interval_idx", intervalIdx)("count", it->second ? it->second->GetRecordsCount() : 0); - FetchingIntervals.erase(FetchingIntervals.begin()); - if (it->second) { - reader.OnIntervalResult(it->second); - } - ReadyIntervals.erase(it); - } - if (FetchingIntervals.empty()) { - AFL_VERIFY(ReadyIntervals.empty()); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "intervals_finished"); - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "wait_interval")("remained", FetchingIntervals.size())("interval_idx", FetchingIntervals.begin()->first); - } - } -} - -TScanHead::TScanHead(std::deque>&& sources, const std::shared_ptr& context) - : Context(context) -{ - InFlightLimit = Context->GetReadMetadata()->Limit ? 1 : Max(); - while (sources.size()) { - auto source = sources.front(); - BorderPoints[source->GetStart()].AddStart(source); - BorderPoints[source->GetFinish()].AddFinish(source); - sources.pop_front(); - } - - THashMap> currentSources; - for (auto&& i : BorderPoints) { - for (auto&& s : i.second.GetStartSources()) { - AFL_VERIFY(currentSources.emplace(s->GetSourceIdx(), s).second); - } - for (auto&& [_, source] : currentSources) { - source->IncIntervalsCount(); - } - for (auto&& s : i.second.GetFinishSources()) { - AFL_VERIFY(currentSources.erase(s->GetSourceIdx())); - } - } -} - -bool TScanHead::BuildNextInterval() { - while (BorderPoints.size() && (FetchingIntervals.size() < InFlightLimit || BorderPoints.begin()->second.GetStartSources().empty())) { - auto firstBorderPointInfo = std::move(BorderPoints.begin()->second); - bool includeStart = firstBorderPointInfo.GetStartSources().size(); - for (auto&& i : firstBorderPointInfo.GetStartSources()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("add_source", i->GetSourceIdx()); - AFL_VERIFY(CurrentSegments.emplace(i->GetSourceIdx(), i).second)("idx", i->GetSourceIdx()); - } - - if (firstBorderPointInfo.GetStartSources().size() && firstBorderPointInfo.GetFinishSources().size()) { - includeStart = false; - const ui32 intervalIdx = SegmentIdxCounter++; - auto interval = std::make_shared( - BorderPoints.begin()->first, BorderPoints.begin()->first, intervalIdx, CurrentSegments, - Context, true, true, false); - FetchingIntervals.emplace(intervalIdx, interval); - IntervalStats.emplace_back(CurrentSegments.size(), true); - NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)("interval", interval->DebugJson()); - } - - for (auto&& i : firstBorderPointInfo.GetFinishSources()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("remove_source", i->GetSourceIdx()); - AFL_VERIFY(CurrentSegments.erase(i->GetSourceIdx()))("idx", i->GetSourceIdx()); - } - - CurrentStart = BorderPoints.begin()->first; - BorderPoints.erase(BorderPoints.begin()); - if (CurrentSegments.size()) { - Y_ABORT_UNLESS(BorderPoints.size()); - const bool includeFinish = BorderPoints.begin()->second.GetStartSources().empty(); - const ui32 intervalIdx = SegmentIdxCounter++; - const bool isExclusiveInterval = (CurrentSegments.size() == 1) && includeStart && includeFinish; - auto interval = std::make_shared(*CurrentStart, BorderPoints.begin()->first, intervalIdx, CurrentSegments, Context, includeFinish, includeStart, isExclusiveInterval); - FetchingIntervals.emplace(intervalIdx, interval); - IntervalStats.emplace_back(CurrentSegments.size(), false); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "new_interval")("interval_idx", intervalIdx)("interval", interval->DebugJson()); - NResourceBroker::NSubscribe::ITask::StartResourceSubscription(Context->GetCommonContext()->GetResourceSubscribeActorId(), interval); - return true; - } else { - IntervalStats.emplace_back(CurrentSegments.size(), false); - } - } - return false; -} - -const NKikimr::NOlap::TReadContext& TScanHead::GetContext() const { - return *Context->GetCommonContext(); -} - -bool TScanHead::IsReverse() const { - return GetContext().GetReadMetadata()->IsDescSorted(); -} - -void TScanHead::Abort() { - for (auto&& i : FetchingIntervals) { - i.second->Abort(); - } - FetchingIntervals.clear(); - BorderPoints.clear(); - Y_ABORT_UNLESS(IsFinished()); -} - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.h b/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.h deleted file mode 100644 index 8e071e8f54a5..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/scanner.h +++ /dev/null @@ -1,76 +0,0 @@ -#pragma once -#include "source.h" -#include "interval.h" -#include - -namespace NKikimr::NOlap::NPlainReader { - -class TPlainReadData; - -class TDataSourceEndpoint { -private: - YDB_READONLY_DEF(std::vector>, StartSources); - YDB_READONLY_DEF(std::vector>, FinishSources); -public: - void AddStart(std::shared_ptr source) { - StartSources.emplace_back(source); - } - void AddFinish(std::shared_ptr source) { - FinishSources.emplace_back(source); - } -}; - -class TIntervalStat { -private: - YDB_READONLY(ui32, SourcesCount, 0); - YDB_READONLY(bool, IsPoint, false); -public: - TIntervalStat(const ui32 sourcesCount, const bool isPoint) - : SourcesCount(sourcesCount) - , IsPoint(isPoint) - { - - } -}; - -class TScanHead { -private: - std::shared_ptr Context; - std::map BorderPoints; - std::map> CurrentSegments; - std::optional CurrentStart; - std::map> FetchingIntervals; - THashMap> ReadyIntervals; - ui32 SegmentIdxCounter = 0; - std::vector IntervalStats; - void DrainSources(); - ui64 InFlightLimit = 1; - ui64 ZeroCount = 0; -public: - - bool IsReverse() const; - void Abort(); - - bool IsFinished() const { - return BorderPoints.empty() && FetchingIntervals.empty(); - } - - const TReadContext& GetContext() const; - - TString DebugString() const { - TStringBuilder sb; - for (auto&& i : IntervalStats) { - sb << (i.GetIsPoint() ? "^" : "") << i.GetSourcesCount() << ";"; - } - return sb; - } - - void OnIntervalResult(const std::optional& batch, const std::shared_ptr& lastPK, const ui32 intervalIdx, TPlainReadData& reader); - - TScanHead(std::deque>&& sources, const std::shared_ptr& context); - - bool BuildNextInterval(); - -}; - -} diff --git a/ydb/core/tx/columnshard/engines/reader/plain_reader/ya.make b/ydb/core/tx/columnshard/engines/reader/plain_reader/ya.make index 4a4db941aa67..c0e311e32598 100644 --- a/ydb/core/tx/columnshard/engines/reader/plain_reader/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/plain_reader/ya.make @@ -1,21 +1,11 @@ LIBRARY() SRCS( - scanner.cpp - constructor.cpp - source.cpp - interval.cpp - fetched_data.cpp - plain_read_data.cpp - columns_set.cpp - context.cpp - fetching.cpp ) PEERDIR( - ydb/core/formats/arrow - ydb/core/tx/columnshard/blobs_action - ydb/core/tx/conveyor/usage + ydb/core/tx/columnshard/engines/reader/plain_reader/constructor + ydb/core/tx/columnshard/engines/reader/plain_reader/iterator ) END() diff --git a/ydb/core/tx/columnshard/engines/reader/read_context.cpp b/ydb/core/tx/columnshard/engines/reader/read_context.cpp deleted file mode 100644 index ba07afecc800..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/read_context.cpp +++ /dev/null @@ -1,16 +0,0 @@ -#include "read_context.h" -#include "read_metadata.h" -#include - -namespace NKikimr::NOlap { - -void TActorBasedMemoryAccesor::DoOnBufferReady() { - OwnerId.Send(OwnerId, new NActors::TEvents::TEvWakeup(1)); -} - - -IDataReader::IDataReader(const std::shared_ptr& context) - : Context(context) { -} - -} diff --git a/ydb/core/tx/columnshard/engines/reader/read_filter_merger.h b/ydb/core/tx/columnshard/engines/reader/read_filter_merger.h deleted file mode 100644 index 54dadf24d101..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/read_filter_merger.h +++ /dev/null @@ -1,423 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap::NIndexedReader { - -class TRecordBatchBuilder; - -template -class TSortingHeap { -public: - TSortingHeap() = default; - - template - TSortingHeap(TCursors& cursors, bool notNull) { - Queue.reserve(cursors.size()); - for (auto& cur : cursors) { - if (!cur.Empty()) { - Queue.emplace_back(TSortCursor(&cur, notNull)); - } - } - std::make_heap(Queue.begin(), Queue.end()); - } - - const TSortCursor& Current() const { return Queue.front(); } - TSortCursor& MutableCurrent() { return Queue.front(); } - size_t Size() const { return Queue.size(); } - bool Empty() const { return Queue.empty(); } - TSortCursor& NextChild() { return Queue[NextChildIndex()]; } - - void Next() { - Y_ABORT_UNLESS(Size()); - - if (Queue.front().Next()) { - UpdateTop(); - } else { - RemoveTop(); - } - } - - void RemoveTop() { - std::pop_heap(Queue.begin(), Queue.end()); - Queue.pop_back(); - NextIdx = 0; - } - - void Push(TSortCursor&& cursor) { - Queue.emplace_back(cursor); - std::push_heap(Queue.begin(), Queue.end()); - NextIdx = 0; - } - - NJson::TJsonValue DebugJson() const { - NJson::TJsonValue result = NJson::JSON_ARRAY; - for (auto&& i : Queue) { - result.AppendValue(i.DebugJson()); - } - return result; - } - - /// This is adapted version of the function __sift_down from libc++. - /// Why cannot simply use std::priority_queue? - /// - because it doesn't support updating the top element and requires pop and push instead. - /// Also look at "Boost.Heap" library. - void UpdateTop() { - size_t size = Queue.size(); - if (size < 2) - return; - - auto begin = Queue.begin(); - - size_t child_idx = NextChildIndex(); - auto child_it = begin + child_idx; - - /// Check if we are in order. - if (*child_it < *begin) - return; - - NextIdx = 0; - - auto curr_it = begin; - auto top(std::move(*begin)); - do { - /// We are not in heap-order, swap the parent with it's largest child. - *curr_it = std::move(*child_it); - curr_it = child_it; - - // recompute the child based off of the updated parent - child_idx = 2 * child_idx + 1; - - if (child_idx >= size) - break; - - child_it = begin + child_idx; - - if ((child_idx + 1) < size && *child_it < *(child_it + 1)) { - /// Right child exists and is greater than left child. - ++child_it; - ++child_idx; - } - - /// Check if we are in order. - } while (!(*child_it < top)); - *curr_it = std::move(top); - } -private: - std::vector Queue; - /// Cache comparison between first and second child if the order in queue has not been changed. - size_t NextIdx = 0; - - size_t NextChildIndex() { - if (NextIdx == 0) { - NextIdx = 1; - if (Queue.size() > 2 && Queue[1] < Queue[2]) { - ++NextIdx; - } - } - - return NextIdx; - } - -}; - -class TMergePartialStream { -private: -#ifndef NDEBUG - std::optional CurrentKeyColumns; -#endif - bool PossibleSameVersionFlag = true; - - class TBatchIterator { - private: - bool ControlPointFlag; - TSortableBatchPosition KeyColumns; - TSortableBatchPosition VersionColumns; - i64 RecordsCount; - int ReverseSortKff; - - std::shared_ptr Filter; - std::shared_ptr FilterIterator; - - i32 GetFirstPosition() const { - if (ReverseSortKff > 0) { - return 0; - } else { - return RecordsCount - 1; - } - } - - public: - NJson::TJsonValue DebugJson() const; - - const std::shared_ptr& GetFilter() const { - return Filter; - } - - bool IsControlPoint() const { - return ControlPointFlag; - } - - const TSortableBatchPosition& GetKeyColumns() const { - return KeyColumns; - } - - const TSortableBatchPosition& GetVersionColumns() const { - return VersionColumns; - } - - TBatchIterator(const TSortableBatchPosition& keyColumns) - : ControlPointFlag(true) - , KeyColumns(keyColumns) - { - - } - - TBatchIterator(std::shared_ptr batch, std::shared_ptr filter, - const std::vector& keyColumns, const std::vector& dataColumns, const bool reverseSort) - : ControlPointFlag(false) - , KeyColumns(batch, 0, keyColumns, dataColumns, reverseSort) - , VersionColumns(batch, 0, TIndexInfo::GetSpecialColumnNames(), {}, false) - , RecordsCount(batch->num_rows()) - , ReverseSortKff(reverseSort ? -1 : 1) - , Filter(filter) - { - Y_ABORT_UNLESS(KeyColumns.InitPosition(GetFirstPosition())); - Y_ABORT_UNLESS(VersionColumns.InitPosition(GetFirstPosition())); - if (Filter) { - FilterIterator = std::make_shared(Filter->GetIterator(reverseSort, RecordsCount)); - } - } - - bool CheckNextBatch(const TBatchIterator& nextIterator) { - return KeyColumns.Compare(nextIterator.KeyColumns) == std::partial_ordering::less; - } - - bool IsReverse() const { - return ReverseSortKff < 0; - } - - bool IsDeleted() const { - if (!FilterIterator) { - return false; - } - return !FilterIterator->GetCurrentAcceptance(); - } - - TSortableBatchPosition::TFoundPosition SkipToLower(const TSortableBatchPosition& pos) { - const ui32 posStart = KeyColumns.GetPosition(); - auto result = KeyColumns.SkipToLower(pos); - const i32 delta = IsReverse() ? (posStart - KeyColumns.GetPosition()) : (KeyColumns.GetPosition() - posStart); - AFL_VERIFY(delta >= 0); - AFL_VERIFY(VersionColumns.InitPosition(KeyColumns.GetPosition())); - if (FilterIterator && delta) { - AFL_VERIFY(FilterIterator->Next(delta)); - } - return result; - } - - bool Next() { - const bool result = KeyColumns.NextPosition(ReverseSortKff) && VersionColumns.NextPosition(ReverseSortKff); - if (FilterIterator) { - Y_ABORT_UNLESS(result == FilterIterator->Next(1)); - } - return result; - } - - bool operator<(const TBatchIterator& item) const { - const std::partial_ordering result = KeyColumns.Compare(item.KeyColumns); - if (result == std::partial_ordering::equivalent) { - if (IsControlPoint() && item.IsControlPoint()) { - return false; - } else if (IsControlPoint()) { - return false; - } else if (item.IsControlPoint()) { - return true; - } - //don't need inverse through we need maximal version at first (reverse analytic not included in VersionColumns) - return VersionColumns.Compare(item.VersionColumns) == std::partial_ordering::less; - } else { - //inverse logic through we use max heap, but need minimal element if not reverse (reverse analytic included in KeyColumns) - return result == std::partial_ordering::greater; - } - } - }; - - class TIteratorData { - private: - YDB_READONLY_DEF(std::shared_ptr, Batch); - YDB_READONLY_DEF(std::shared_ptr, Filter); - public: - TIteratorData(std::shared_ptr batch, std::shared_ptr filter) - : Batch(batch) - , Filter(filter) - { - - } - }; - - NJson::TJsonValue DebugJson() const { - NJson::TJsonValue result = NJson::JSON_MAP; -#ifndef NDEBUG - if (CurrentKeyColumns) { - result["current"] = CurrentKeyColumns->DebugJson(); - } -#endif - result.InsertValue("heap", SortHeap.DebugJson()); - return result; - } - - TSortingHeap SortHeap; - std::shared_ptr SortSchema; - std::shared_ptr DataSchema; - const bool Reverse; - ui32 ControlPoints = 0; - - std::optional DrainCurrentPosition(); - - void AddNewToHeap(std::shared_ptr batch, std::shared_ptr filter); - void CheckSequenceInDebug(const TSortableBatchPosition& nextKeyColumnsPosition); -public: - TMergePartialStream(std::shared_ptr sortSchema, std::shared_ptr dataSchema, const bool reverse) - : SortSchema(sortSchema) - , DataSchema(dataSchema) - , Reverse(reverse) { - Y_ABORT_UNLESS(SortSchema); - Y_ABORT_UNLESS(SortSchema->num_fields()); - Y_ABORT_UNLESS(!DataSchema || DataSchema->num_fields()); - } - - void SkipToLowerBound(const TSortableBatchPosition& pos, const bool include) { - if (SortHeap.Empty()) { - return; - } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("pos", pos.DebugJson().GetStringRobust())("heap", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); - while (!SortHeap.Empty()) { - const auto cmpResult = SortHeap.Current().GetKeyColumns().Compare(pos); - if (cmpResult == std::partial_ordering::greater) { - break; - } - if (cmpResult == std::partial_ordering::equivalent && include) { - break; - } - const TSortableBatchPosition::TFoundPosition skipPos = SortHeap.MutableCurrent().SkipToLower(pos); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("pos", pos.DebugJson().GetStringRobust())("heap", SortHeap.Current().GetKeyColumns().DebugJson().GetStringRobust()); - if (skipPos.IsEqual()) { - if (!include && !SortHeap.MutableCurrent().Next()) { - SortHeap.RemoveTop(); - } else { - SortHeap.UpdateTop(); - } - } else if (skipPos.IsLess()) { - SortHeap.RemoveTop(); - } else { - SortHeap.UpdateTop(); - } - } - } - - void SetPossibleSameVersion(const bool value) { - PossibleSameVersionFlag = value; - } - - bool IsValid() const { - return SortHeap.Size(); - } - - ui32 GetSourcesCount() const { - return SortHeap.Size(); - } - - TString DebugString() const { - return TStringBuilder() << "sort_heap=" << SortHeap.DebugJson(); - } - - void PutControlPoint(std::shared_ptr point); - - void RemoveControlPoint(); - - bool ControlPointEnriched() const { - return SortHeap.Size() && SortHeap.Current().IsControlPoint(); - } - - void AddSource(std::shared_ptr batch, std::shared_ptr filter); - - bool IsEmpty() const { - return !SortHeap.Size(); - } - - bool DrainAll(TRecordBatchBuilder& builder); - std::shared_ptr SingleSourceDrain(const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); - bool DrainCurrentTo(TRecordBatchBuilder& builder, const TSortableBatchPosition& readTo, const bool includeFinish, std::optional* lastResultPosition = nullptr); - std::vector> DrainAllParts(const std::map& positions, - const std::vector>& resultFields); -}; - -class TRecordBatchBuilder { -private: - std::vector> Builders; - YDB_READONLY_DEF(std::vector>, Fields); - YDB_READONLY(ui32, RecordsCount, 0); - - bool IsSameFieldsSequence(const std::vector>& f1, const std::vector>& f2) { - if (f1.size() != f2.size()) { - return false; - } - for (ui32 i = 0; i < f1.size(); ++i) { - if (!f1[i]->Equals(f2[i])) { - return false; - } - } - return true; - } - -public: - ui32 GetBuildersCount() const { - return Builders.size(); - } - - TString GetColumnNames() const { - TStringBuilder result; - for (auto&& f : Fields) { - result << f->name() << ","; - } - return result; - } - - TRecordBatchBuilder(const std::vector>& fields, const std::optional rowsCountExpectation = {}, const THashMap& fieldDataSizePreallocated = {}) - : Fields(fields) { - Y_ABORT_UNLESS(Fields.size()); - for (auto&& f : fields) { - Builders.emplace_back(NArrow::MakeBuilder(f)); - auto it = fieldDataSizePreallocated.find(f->name()); - if (it != fieldDataSizePreallocated.end()) { - NArrow::ReserveData(*Builders.back(), it->second); - } - if (rowsCountExpectation) { - NArrow::TStatusValidator::Validate(Builders.back()->Reserve(*rowsCountExpectation)); - } - } - } - - std::shared_ptr Finalize() { - auto schema = std::make_shared(Fields); - std::vector> columns; - for (auto&& i : Builders) { - columns.emplace_back(NArrow::TStatusValidator::GetValid(i->Finish())); - } - return arrow::RecordBatch::Make(schema, columns.front()->length(), columns); - } - - void AddRecord(const TSortableBatchPosition& position); - void ValidateDataSchema(const std::shared_ptr& schema); -}; - -} diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp b/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp deleted file mode 100644 index e9a9e804977c..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.cpp +++ /dev/null @@ -1,112 +0,0 @@ -#include "read_metadata.h" -#include "read_context.h" -#include "plain_reader/plain_read_data.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -TDataStorageAccessor::TDataStorageAccessor(const std::unique_ptr& insertTable, - const std::unique_ptr& index) - : InsertTable(insertTable) - , Index(index) -{} - -std::shared_ptr TDataStorageAccessor::Select(const NOlap::TReadDescription& readDescription) const { - if (readDescription.ReadNothing) { - return std::make_shared(); - } - return Index->Select(readDescription.PathId, - readDescription.GetSnapshot(), - readDescription.PKRangesFilter); -} - -std::vector TDataStorageAccessor::GetCommitedBlobs(const NOlap::TReadDescription& readDescription, const std::shared_ptr& pkSchema) const { - return std::move(InsertTable->Read(readDescription.PathId, readDescription.GetSnapshot(), pkSchema)); -} - -std::unique_ptr TReadMetadata::StartScan(const std::shared_ptr& readContext) const { - return std::make_unique(readContext, this->shared_from_this()); -} - -bool TReadMetadata::Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor, std::string& /*error*/) { - SetPKRangesFilter(readDescription.PKRangesFilter); - - /// @note We could have column name changes between schema versions: - /// Add '1:foo', Drop '1:foo', Add '2:foo'. Drop should hide '1:foo' from reads. - /// It's expected that we have only one version on 'foo' in blob and could split them by schema {planStep:txId}. - /// So '1:foo' would be omitted in blob records for the column in new snapshots. And '2:foo' - in old ones. - /// It's not possible for blobs with several columns. There should be a special logic for them. - CommittedBlobs = dataAccessor.GetCommitedBlobs(readDescription, ResultIndexSchema->GetIndexInfo().GetReplaceKey()); - - SelectInfo = dataAccessor.Select(readDescription); - StatsMode = readDescription.StatsMode; - return true; -} - -std::set TReadMetadata::GetEarlyFilterColumnIds() const { - auto& indexInfo = ResultIndexSchema->GetIndexInfo(); - std::set result; - for (auto&& i : GetProgram().GetEarlyFilterColumns()) { - auto id = indexInfo.GetColumnIdOptional(i); - if (id) { - result.emplace(*id); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("early_filter_column", i); - } - } - return result; -} - -std::set TReadMetadata::GetPKColumnIds() const { - std::set result; - auto& indexInfo = ResultIndexSchema->GetIndexInfo(); - for (auto&& i : indexInfo.GetPrimaryKeyColumns()) { - Y_ABORT_UNLESS(result.emplace(indexInfo.GetColumnId(i.first)).second); - } - return result; -} - -std::vector> TReadStatsMetadata::GetKeyYqlSchema() const { - return NOlap::GetColumns(NColumnShard::PrimaryIndexStatsSchema, NColumnShard::PrimaryIndexStatsSchema.KeyColumns); -} - -std::unique_ptr TReadStatsMetadata::StartScan(const std::shared_ptr& /*readContext*/) const { - return std::make_unique(this->shared_from_this()); -} - -void TReadStats::PrintToLog() { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN) - ("event", "statistic") - ("begin", BeginTimestamp) - ("index_granules", IndexGranules) - ("index_portions", IndexPortions) - ("index_batches", IndexBatches) - ("committed_batches", CommittedBatches) - ("schema_columns", SchemaColumns) - ("filter_columns", FilterColumns) - ("additional_columns", AdditionalColumns) - ("compacted_portions_bytes", CompactedPortionsBytes) - ("inserted_portions_bytes", InsertedPortionsBytes) - ("committed_portions_bytes", CommittedPortionsBytes) - ("data_filter_bytes", DataFilterBytes) - ("data_additional_bytes", DataAdditionalBytes) - ("delta_bytes", CompactedPortionsBytes + InsertedPortionsBytes + CommittedPortionsBytes - DataFilterBytes - DataAdditionalBytes) - ("selected_rows", SelectedRows) - ; -} - -std::shared_ptr TReadMetadata::BuildReader(const std::shared_ptr& context) const { - return std::make_shared(context); -// auto result = std::make_shared(self, context); -// result->InitRead(); -// return result; -} - -NIndexedReader::TSortableBatchPosition TReadMetadata::BuildSortedPosition(const NArrow::TReplaceKey& key) const { - return NIndexedReader::TSortableBatchPosition(key.ToBatch(GetReplaceKey()), 0, - GetReplaceKey()->field_names(), {}, IsDescSorted()); -} - -} diff --git a/ydb/core/tx/columnshard/engines/reader/read_metadata.h b/ydb/core/tx/columnshard/engines/reader/read_metadata.h deleted file mode 100644 index 13358c0586df..000000000000 --- a/ydb/core/tx/columnshard/engines/reader/read_metadata.h +++ /dev/null @@ -1,303 +0,0 @@ -#pragma once -#include "conveyor_task.h" -#include "description.h" -#include "read_filter_merger.h" -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include - -namespace NKikimr::NColumnShard { -class TScanIteratorBase; -} - -namespace NKikimr::NOlap { - -class TReadContext; - -struct TReadStats { - TInstant BeginTimestamp; - ui64 IndexGranules{0}; - ui64 IndexPortions{0}; - ui64 IndexBatches{0}; - ui64 CommittedBatches{0}; - ui64 CommittedPortionsBytes = 0; - ui64 InsertedPortionsBytes = 0; - ui64 CompactedPortionsBytes = 0; - ui64 DataFilterBytes{ 0 }; - ui64 DataAdditionalBytes{ 0 }; - - ui32 SchemaColumns = 0; - ui32 FilterColumns = 0; - ui32 AdditionalColumns = 0; - - ui32 SelectedRows = 0; - - TReadStats() - : BeginTimestamp(TInstant::Now()) - {} - - void PrintToLog(); - - ui64 GetReadBytes() const { - return CompactedPortionsBytes + InsertedPortionsBytes + CompactedPortionsBytes; - } - - TDuration Duration() { - return TInstant::Now() - BeginTimestamp; - } -}; - -class TDataStorageAccessor { -private: - const std::unique_ptr& InsertTable; - const std::unique_ptr& Index; - -public: - TDataStorageAccessor(const std::unique_ptr& insertTable, - const std::unique_ptr& index); - std::shared_ptr Select(const NOlap::TReadDescription& readDescription) const; - std::vector GetCommitedBlobs(const NOlap::TReadDescription& readDescription, const std::shared_ptr& pkSchema) const; -}; - -// Holds all metadata that is needed to perform read/scan -struct TReadMetadataBase { -public: - enum class ESorting { - NONE = 0 /* "not_sorted" */, - ASC /* "ascending" */, - DESC /* "descending" */, - }; -private: - const ESorting Sorting = ESorting::ASC; // Sorting inside returned batches - std::optional PKRangesFilter; - TProgramContainer Program; -public: - using TConstPtr = std::shared_ptr; - - void SetPKRangesFilter(const TPKRangesFilter& value) { - Y_ABORT_UNLESS(IsSorted() && value.IsReverse() == IsDescSorted()); - Y_ABORT_UNLESS(!PKRangesFilter); - PKRangesFilter = value; - } - - const TPKRangesFilter& GetPKRangesFilter() const { - Y_ABORT_UNLESS(!!PKRangesFilter); - return *PKRangesFilter; - } - - TReadMetadataBase(const ESorting sorting, const TProgramContainer& ssaProgram) - : Sorting(sorting) - , Program(ssaProgram) - { - } - virtual ~TReadMetadataBase() = default; - - ui64 Limit = 0; - - virtual void Dump(IOutputStream& out) const { - out << " predicate{" << (PKRangesFilter ? PKRangesFilter->DebugString() : "no_initialized") << "}" - << " " << Sorting << " sorted"; - } - - bool IsAscSorted() const { return Sorting == ESorting::ASC; } - bool IsDescSorted() const { return Sorting == ESorting::DESC; } - bool IsSorted() const { return IsAscSorted() || IsDescSorted(); } - - virtual std::vector> GetKeyYqlSchema() const = 0; - virtual std::unique_ptr StartScan(const std::shared_ptr& readContext) const = 0; - - // TODO: can this only be done for base class? - friend IOutputStream& operator << (IOutputStream& out, const TReadMetadataBase& meta) { - meta.Dump(out); - return out; - } - - const TProgramContainer& GetProgram() const { - return Program; - } -}; - -// Holds all metadata that is needed to perform read/scan -struct TReadMetadata : public TReadMetadataBase, public std::enable_shared_from_this { - using TBase = TReadMetadataBase; -private: - TVersionedIndex IndexVersions; - TSnapshot Snapshot; - std::shared_ptr ResultIndexSchema; -public: - using TConstPtr = std::shared_ptr; - - NIndexedReader::TSortableBatchPosition BuildSortedPosition(const NArrow::TReplaceKey& key) const; - std::shared_ptr BuildReader(const std::shared_ptr& context) const; - - bool HasProcessingColumnIds() const { - return GetProgram().HasProcessingColumnIds(); - } - - std::set GetProcessingColumnIds() const { - std::set result; - for (auto&& i : GetProgram().GetProcessingColumns()) { - result.emplace(ResultIndexSchema->GetIndexInfo().GetColumnId(i)); - } - return result; - } - std::shared_ptr SelectInfo; - NYql::NDqProto::EDqStatsMode StatsMode = NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE; - std::vector CommittedBlobs; - std::shared_ptr ReadStats; - - const TSnapshot& GetSnapshot() const { - return Snapshot; - } - - TReadMetadata(const TVersionedIndex& info, const TSnapshot& snapshot, const ESorting sorting, const TProgramContainer& ssaProgram) - : TBase(sorting, ssaProgram) - , IndexVersions(info) - , Snapshot(snapshot) - , ResultIndexSchema(info.GetSchema(Snapshot)) - , ReadStats(std::make_shared()) - { - } - - bool Init(const TReadDescription& readDescription, const TDataStorageAccessor& dataAccessor, std::string& error); - - ISnapshotSchema::TPtr GetSnapshotSchema(const TSnapshot& version) const { - if (version >= Snapshot){ - return ResultIndexSchema; - } - return IndexVersions.GetSchema(version); - } - - ISnapshotSchema::TPtr GetLoadSchema(const std::optional& version = {}) const { - if (!version) { - return ResultIndexSchema; - } - return IndexVersions.GetSchema(*version); - } - - std::shared_ptr GetBlobSchema(const ui64 version) const { - return IndexVersions.GetSchema(version)->GetIndexInfo().ArrowSchema(); - } - - const TIndexInfo& GetIndexInfo(const std::optional& version = {}) const { - if (version && version < Snapshot) { - return IndexVersions.GetSchema(*version)->GetIndexInfo(); - } - return ResultIndexSchema->GetIndexInfo(); - } - - std::vector GetColumnsOrder() const { - auto loadSchema = GetLoadSchema(Snapshot); - std::vector result; - for (auto&& i : loadSchema->GetSchema()->fields()) { - result.emplace_back(i->name()); - } - return result; - } - - std::set GetEarlyFilterColumnIds() const; - std::set GetPKColumnIds() const; - - bool Empty() const { - Y_ABORT_UNLESS(SelectInfo); - return SelectInfo->PortionsOrderedPK.empty() && CommittedBlobs.empty(); - } - - std::shared_ptr GetReplaceKey() const { - return ResultIndexSchema->GetIndexInfo().GetReplaceKey(); - } - - std::vector GetKeyYqlSchema() const override { - return ResultIndexSchema->GetIndexInfo().GetPrimaryKeyColumns(); - } - - size_t NumIndexedChunks() const { - Y_ABORT_UNLESS(SelectInfo); - return SelectInfo->NumChunks(); - } - - size_t NumIndexedBlobs() const { - Y_ABORT_UNLESS(SelectInfo); - return SelectInfo->Stats().Blobs; - } - - std::unique_ptr StartScan(const std::shared_ptr& readContext) const override; - - void Dump(IOutputStream& out) const override { - out << " index chunks: " << NumIndexedChunks() - << " index blobs: " << NumIndexedBlobs() - << " committed blobs: " << CommittedBlobs.size() - // << " with program steps: " << (Program ? Program->Steps.size() : 0) - << " at snapshot: " << Snapshot.GetPlanStep() << ":" << Snapshot.GetTxId(); - TBase::Dump(out); - if (SelectInfo) { - out << ", " << *SelectInfo; - } - } - - friend IOutputStream& operator << (IOutputStream& out, const TReadMetadata& meta) { - meta.Dump(out); - return out; - } -}; - -struct TReadStatsMetadata : public TReadMetadataBase, public std::enable_shared_from_this { -private: - using TBase = TReadMetadataBase; - TSnapshot RequestSnapshot; - std::shared_ptr ResultIndexSchema; -public: - using TConstPtr = std::shared_ptr; - - const ui64 TabletId; - std::vector ReadColumnIds; - std::vector ResultColumnIds; - std::deque> IndexPortions; - - const TSnapshot& GetRequestSnapshot() const { return RequestSnapshot; } - - std::optional GetColumnNameDef(const ui32 columnId) const { - if (!ResultIndexSchema) { - return {}; - } - auto f = ResultIndexSchema->GetFieldByColumnIdOptional(columnId); - if (!f) { - return {}; - } - return f->name(); - } - - std::optional GetEntityName(const ui32 entityId) const { - if (!ResultIndexSchema) { - return {}; - } - auto result = ResultIndexSchema->GetIndexInfo().GetColumnNameOptional(entityId); - if (!!result) { - return result; - } - return ResultIndexSchema->GetIndexInfo().GetIndexNameOptional(entityId); - } - - explicit TReadStatsMetadata(ui64 tabletId, const ESorting sorting, const TProgramContainer& ssaProgram, const std::shared_ptr& schema, const TSnapshot& requestSnapshot) - : TBase(sorting, ssaProgram) - , RequestSnapshot(requestSnapshot) - , ResultIndexSchema(schema) - , TabletId(tabletId) - { - } - - std::vector> GetKeyYqlSchema() const override; - - std::unique_ptr StartScan(const std::shared_ptr& readContext) const override; -}; - -} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/granule_view.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/granule_view.cpp new file mode 100644 index 000000000000..db99dc03e698 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/granule_view.cpp @@ -0,0 +1,5 @@ +#include "granule_view.h" + +namespace NKikimr::NOlap::NReader::NSysView::NAbstract { + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/granule_view.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/granule_view.h new file mode 100644 index 000000000000..84d0bb698e0a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/granule_view.h @@ -0,0 +1,43 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NReader::NSysView::NAbstract { + +class TGranuleMetaView { +private: + using TPortions = std::deque>; + YDB_READONLY(ui64, PathId, 0); + YDB_READONLY_DEF(TPortions, Portions); +public: + TGranuleMetaView(const TGranuleMeta& granule, const bool reverse) + : PathId(granule.GetPathId()) + { + for (auto&& i : granule.GetPortions()) { + Portions.emplace_back(i.second); + } + + const auto predSort = [](const std::shared_ptr& l, const std::shared_ptr& r) { + return l->GetPortionId() < r->GetPortionId(); + }; + + std::sort(Portions.begin(), Portions.end(), predSort); + if (reverse) { + std::reverse(Portions.begin(), Portions.end()); + } + } + + bool operator<(const TGranuleMetaView& item) const { + return PathId < item.PathId; + } + + std::shared_ptr PopFrontPortion() { + if (Portions.empty()) { + return nullptr; + } + auto result = Portions.front(); + Portions.pop_front(); + return result; + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.cpp new file mode 100644 index 000000000000..c47dd37eacb6 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.cpp @@ -0,0 +1,5 @@ +#include "iterator.h" + +namespace NKikimr::NOlap::NReader::NSysView::NAbstract { + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h new file mode 100644 index 000000000000..aea0425815bf --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/iterator.h @@ -0,0 +1,155 @@ +#pragma once +#include "granule_view.h" +#include "metadata.h" + +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView::NAbstract { + +class TStatsIteratorBase: public TScanIteratorBase { +private: + const NTable::TScheme::TTableSchema StatsSchema; + std::shared_ptr DataSchema; +protected: + virtual bool AppendStats(const std::vector>& builders, TGranuleMetaView& granule) const = 0; + virtual ui32 PredictRecordsCount(const TGranuleMetaView& granule) const = 0; + TReadStatsMetadata::TConstPtr ReadMetadata; + const bool Reverse = false; + std::shared_ptr KeySchema; + std::shared_ptr ResultSchema; + + std::deque IndexGranules; +public: + virtual TConclusionStatus Start() override { + return TConclusionStatus::Success(); + } + + virtual bool Finished() const override { + return IndexGranules.empty(); + } + + virtual TConclusion> GetBatch() override { + while (!Finished()) { + auto batchOpt = ExtractStatsBatch(); + if (!batchOpt) { + AFL_VERIFY(Finished()); + return std::nullopt; + } + auto originalBatch = *batchOpt; + if (originalBatch->num_rows() == 0) { + continue; + } + auto keyBatch = NArrow::ExtractColumns(originalBatch, KeySchema); + auto lastKey = keyBatch->Slice(keyBatch->num_rows() - 1, 1); + + { + NArrow::TColumnFilter filter = ReadMetadata->GetPKRangesFilter().BuildFilter(originalBatch); + filter.Apply(originalBatch); + } + + // Leave only requested columns + auto resultBatch = NArrow::ExtractColumns(originalBatch, ResultSchema); + NArrow::TStatusValidator::Validate(ReadMetadata->GetProgram().ApplyProgram(resultBatch)); + if (resultBatch->num_rows() == 0) { + continue; + } + auto table = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({resultBatch})); + TPartialReadResult out(table, lastKey, std::nullopt); + return std::move(out); + } + return std::nullopt; + } + + std::optional> ExtractStatsBatch() { + while (IndexGranules.size()) { + auto builders = NArrow::MakeBuilders(DataSchema, PredictRecordsCount(IndexGranules.front())); + if (!AppendStats(builders, IndexGranules.front())) { + IndexGranules.pop_front(); + } + auto columns = NArrow::Finish(std::move(builders)); + AFL_VERIFY(columns.size()); + std::optional count; + for (auto&& i : columns) { + if (!count) { + count = i->length(); + } else { + AFL_VERIFY(*count == i->length()); + } + } + auto result = arrow::RecordBatch::Make(DataSchema, columns.front()->length(), columns); + if (result->num_rows()) { + return result; + } + } + return std::nullopt; + } + + + TStatsIteratorBase(const NAbstract::TReadStatsMetadata::TConstPtr& readMetadata, const NTable::TScheme::TTableSchema& statsSchema) + : StatsSchema(statsSchema) + , ReadMetadata(readMetadata) + , KeySchema(MakeArrowSchema(StatsSchema.Columns, StatsSchema.KeyColumns)) + , ResultSchema(MakeArrowSchema(StatsSchema.Columns, ReadMetadata->ResultColumnIds)) + , IndexGranules(ReadMetadata->IndexGranules) + { + if (ResultSchema->num_fields() == 0) { + ResultSchema = KeySchema; + } + std::vector allColumnIds; + for (const auto& c : StatsSchema.Columns) { + allColumnIds.push_back(c.second.Id); + } + std::sort(allColumnIds.begin(), allColumnIds.end()); + DataSchema = MakeArrowSchema(StatsSchema.Columns, allColumnIds); + } +}; + +template +class TStatsIterator : public TStatsIteratorBase { +private: + using TBase = TStatsIteratorBase; +public: + static inline const NTable::TScheme::TTableSchema StatsSchema = []() { + NTable::TScheme::TTableSchema schema; + NIceDb::NHelpers::TStaticSchemaFiller::Fill(schema); + return schema; + }(); + + class TStatsColumnResolver: public IColumnResolver { + public: + TString GetColumnName(ui32 id, bool required) const override { + auto it = StatsSchema.Columns.find(id); + if (it == StatsSchema.Columns.end()) { + Y_ABORT_UNLESS(!required, "No column '%" PRIu32 "' in primary_index_stats", id); + return {}; + } + return it->second.Name; + } + + std::optional GetColumnIdOptional(const TString& name) const override { + auto it = StatsSchema.ColumnNames.find(name); + if (it == StatsSchema.ColumnNames.end()) { + return {}; + } else { + return it->second; + } + } + + const NTable::TScheme::TTableSchema& GetSchema() const override { + return StatsSchema; + } + + NSsa::TColumnInfo GetDefaultColumn() const override { + return NSsa::TColumnInfo::Original(1, "PathId"); + } + }; + + TStatsIterator(const NAbstract::TReadStatsMetadata::TConstPtr& readMetadata) + : TBase(readMetadata, StatsSchema) + { + } + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.cpp new file mode 100644 index 000000000000..c3008a99032a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.cpp @@ -0,0 +1,5 @@ +#include "metadata.h" + +namespace NKikimr::NOlap::NReader::NSysView::NAbstract { + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.h b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.h new file mode 100644 index 000000000000..c5068be3c82f --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/metadata.h @@ -0,0 +1,26 @@ +#pragma once +#include "granule_view.h" +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView::NAbstract { + +struct TReadStatsMetadata: public TReadMetadataBase { +private: + using TBase = TReadMetadataBase; +public: + using TConstPtr = std::shared_ptr; + + const ui64 TabletId; + std::vector ReadColumnIds; + std::vector ResultColumnIds; + std::deque IndexGranules; + + explicit TReadStatsMetadata(const std::shared_ptr& info, ui64 tabletId, const ESorting sorting, + const TProgramContainer& ssaProgram, const std::shared_ptr& schema, const TSnapshot& requestSnapshot) + : TBase(info, sorting, ssaProgram, schema, requestSnapshot) + , TabletId(tabletId) { + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/ya.make b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/ya.make new file mode 100644 index 000000000000..000edfb62e37 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/abstract/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/abstract +) + +SRCS( + iterator.cpp + metadata.cpp + granule_view.cpp +) + +END() + diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp new file mode 100644 index 000000000000..1aeb83b4ea24 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.cpp @@ -0,0 +1,110 @@ +#include "chunks.h" +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView::NChunks { + +void TStatsIterator::AppendStats(const std::vector>& builders, const TPortionInfo& portion) const { + auto portionSchema = ReadMetadata->GetLoadSchemaVerified(portion); + const std::string prod = ::ToString(portion.GetMeta().Produced); + const bool activity = !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot()); + { + std::vector records; + for (auto&& r : portion.Records) { + records.emplace_back(&r); + } + if (Reverse) { + std::reverse(records.begin(), records.end()); + } + for (auto&& r : records) { + NArrow::Append(*builders[0], portion.GetPathId()); + NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[2], ReadMetadata->TabletId); + NArrow::Append(*builders[3], r->GetMeta().GetNumRows()); + NArrow::Append(*builders[4], r->GetMeta().GetRawBytes()); + NArrow::Append(*builders[5], portion.GetPortionId()); + NArrow::Append(*builders[6], r->GetChunkIdx()); + NArrow::Append(*builders[7], ReadMetadata->GetColumnNameDef(r->GetColumnId()).value_or("undefined")); + NArrow::Append(*builders[8], r->GetColumnId()); + std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy(); + NArrow::Append(*builders[9], blobIdString); + NArrow::Append(*builders[10], r->BlobRange.Offset); + NArrow::Append(*builders[11], r->BlobRange.Size); + NArrow::Append(*builders[12], activity); + + const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetColumnId(), portion.GetMeta().GetTierName()); + std::string strTierName(tierName.data(), tierName.size()); + NArrow::Append(*builders[13], strTierName); + NArrow::Append(*builders[14], "COL"); + } + } + { + std::vector indexes; + for (auto&& r : portion.GetIndexes()) { + indexes.emplace_back(&r); + } + if (Reverse) { + std::reverse(indexes.begin(), indexes.end()); + } + for (auto&& r : indexes) { + NArrow::Append(*builders[0], portion.GetPathId()); + NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[2], ReadMetadata->TabletId); + NArrow::Append(*builders[3], r->GetRecordsCount()); + NArrow::Append(*builders[4], r->GetRawBytes()); + NArrow::Append(*builders[5], portion.GetPortionId()); + NArrow::Append(*builders[6], r->GetChunkIdx()); + NArrow::Append(*builders[7], ReadMetadata->GetEntityName(r->GetIndexId()).value_or("undefined")); + NArrow::Append(*builders[8], r->GetIndexId()); + std::string blobIdString = portion.GetBlobId(r->GetBlobRange().GetBlobIdxVerified()).ToStringLegacy(); + NArrow::Append(*builders[9], blobIdString); + NArrow::Append(*builders[10], r->GetBlobRange().Offset); + NArrow::Append(*builders[11], r->GetBlobRange().Size); + NArrow::Append(*builders[12], activity); + const auto tierName = portionSchema->GetIndexInfo().GetEntityStorageId(r->GetIndexId(), portion.GetMeta().GetTierName()); + std::string strTierName(tierName.data(), tierName.size()); + NArrow::Append(*builders[13], strTierName); + NArrow::Append(*builders[14], "IDX"); + } + } +} + +std::unique_ptr TReadStatsMetadata::StartScan(const std::shared_ptr& readContext) const { + return std::make_unique(readContext->GetReadMetadataPtrVerifiedAs()); +} + +std::vector> TReadStatsMetadata::GetKeyYqlSchema() const { + return GetColumns(TStatsIterator::StatsSchema, TStatsIterator::StatsSchema.KeyColumns); +} + +std::shared_ptr TConstructor::BuildMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { + auto* index = self->GetIndexOptional(); + return std::make_shared(index ? index->CopyVersionedIndexPtr() : nullptr, self->TabletID(), + IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, + read.GetProgram(), index ? index->GetVersionedIndex().GetLastSchema() : nullptr, read.GetSnapshot()); +} + +bool TStatsIterator::AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const { + ui64 recordsCount = 0; + while (auto portion = granule.PopFrontPortion()) { + recordsCount += portion->GetRecords().size() + portion->GetIndexes().size(); + AppendStats(builders, *portion); + if (recordsCount > 10000) { + break; + } + } + return granule.GetPortions().size(); +} + +ui32 TStatsIterator::PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const { + ui32 recordsCount = 0; + for (auto&& portion : granule.GetPortions()) { + recordsCount += portion->GetRecords().size() + portion->GetIndexes().size(); + if (recordsCount > 10000) { + break; + } + } + return recordsCount; +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h new file mode 100644 index 000000000000..0d6fd6618560 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/chunks.h @@ -0,0 +1,39 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView::NChunks { + +class TConstructor: public TStatScannerConstructor { +private: + using TBase = TStatScannerConstructor; +protected: + virtual std::shared_ptr BuildMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const override; +public: + using TBase::TBase; +}; + +class TReadStatsMetadata: public NAbstract::TReadStatsMetadata, std::enable_shared_from_this { +private: + using TBase = NAbstract::TReadStatsMetadata; + using TSysViewSchema = NKikimr::NSysView::Schema::PrimaryIndexStats; +public: + using TBase::TBase; + + virtual std::unique_ptr StartScan(const std::shared_ptr& /*readContext*/) const override; + virtual std::vector> GetKeyYqlSchema() const override; +}; + +class TStatsIterator: public NAbstract::TStatsIterator { +private: + using TBase = NAbstract::TStatsIterator; + virtual bool AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const override; + virtual ui32 PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const override; + void AppendStats(const std::vector>& builders, const TPortionInfo& portion) const; +public: + using TBase::TBase; +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/ya.make b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/ya.make new file mode 100644 index 000000000000..70b3aa79df54 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/chunks/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/sys_view/abstract +) + +SRCS( + chunks.cpp +) + +END() + diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.cpp new file mode 100644 index 000000000000..6101174764da --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.cpp @@ -0,0 +1,5 @@ +#include "constructor.h" + +namespace NKikimr::NOlap::NReader::NSysView { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.h b/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.h new file mode 100644 index 000000000000..e69c36193a4e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/constructor.h @@ -0,0 +1,85 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView { + +template +class TStatScannerConstructor: public IScannerConstructor { +private: + using TBase = IScannerConstructor; + + virtual std::shared_ptr BuildMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const = 0; + + virtual TConclusion> DoBuildReadMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const override { + THashSet readColumnIds(read.ColumnIds.begin(), read.ColumnIds.end()); + for (auto& [id, name] : read.GetProgram().GetSourceColumns()) { + readColumnIds.insert(id); + } + + for (ui32 colId : readColumnIds) { + if (!NAbstract::TStatsIterator::StatsSchema.Columns.contains(colId)) { + return TConclusionStatus::Fail(Sprintf("Columnd id %" PRIu32 " not found", colId)); + } + } + + auto out = BuildMetadata(self, read); + + out->SetPKRangesFilter(read.PKRangesFilter); + out->ReadColumnIds.assign(readColumnIds.begin(), readColumnIds.end()); + out->ResultColumnIds = read.ColumnIds; + + const TColumnEngineForLogs* logsIndex = dynamic_cast(self->GetIndexOptional()); + if (!logsIndex) { + return dynamic_pointer_cast(out); + } + THashSet pathIds; + for (auto&& filter : read.PKRangesFilter) { + const ui64 fromPathId = *filter.GetPredicateFrom().Get(0, 0, 1); + const ui64 toPathId = *filter.GetPredicateTo().Get(0, 0, Max()); + if (read.TableName.EndsWith(IIndexInfo::TABLE_INDEX_STATS_TABLE) + || read.TableName.EndsWith(IIndexInfo::TABLE_INDEX_PORTION_STATS_TABLE) + || read.TableName.EndsWith(IIndexInfo::TABLE_INDEX_GRANULE_STATS_TABLE) + ) { + if (fromPathId <= read.PathId && read.PathId <= toPathId) { + auto pathInfo = logsIndex->GetGranuleOptional(read.PathId); + if (!pathInfo) { + continue; + } + if (pathIds.emplace(pathInfo->GetPathId()).second) { + out->IndexGranules.emplace_back(NAbstract::TGranuleMetaView(*pathInfo, out->IsDescSorted())); + } + } + } else if (read.TableName.EndsWith(IIndexInfo::STORE_INDEX_STATS_TABLE) + || read.TableName.EndsWith(IIndexInfo::STORE_INDEX_PORTION_STATS_TABLE) + || read.TableName.EndsWith(IIndexInfo::STORE_INDEX_GRANULE_STATS_TABLE) + ) { + auto pathInfos = logsIndex->GetTables(fromPathId, toPathId); + for (auto&& pathInfo : pathInfos) { + if (pathIds.emplace(pathInfo->GetPathId()).second) { + out->IndexGranules.emplace_back(NAbstract::TGranuleMetaView(*pathInfo, out->IsDescSorted())); + } + } + } + } + std::sort(out->IndexGranules.begin(), out->IndexGranules.end()); + if (out->IsDescSorted()) { + std::reverse(out->IndexGranules.begin(), out->IndexGranules.end()); + } + return dynamic_pointer_cast(out); + } +public: + using TBase::TBase; + virtual TConclusionStatus ParseProgram(const TVersionedIndex* vIndex, const NKikimrTxDataShard::TEvKqpScan& proto, TReadDescription& read) const override { + typename NAbstract::TStatsIterator::TStatsColumnResolver columnResolver; + return TBase::ParseProgram(vIndex, proto.GetOlapProgramType(), proto.GetOlapProgram(), read, columnResolver); + } + virtual std::vector GetPrimaryKeyScheme(const NColumnShard::TColumnShard* /*self*/) const override { + return GetColumns(NAbstract::TStatsIterator::StatsSchema, NAbstract::TStatsIterator::StatsSchema.KeyColumns); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/ya.make b/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/ya.make new file mode 100644 index 000000000000..bee096800cde --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/constructor/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + constructor.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.cpp new file mode 100644 index 000000000000..4ab8e7ad2bbc --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.cpp @@ -0,0 +1,33 @@ +#include "granules.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView::NGranules { + +bool TStatsIterator::AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const { + NArrow::Append(*builders[0], granule.GetPathId()); + NArrow::Append(*builders[1], ReadMetadata->TabletId); + NArrow::Append(*builders[2], granule.GetPortions().size()); + NArrow::Append(*builders[3], HostNameField); + NArrow::Append(*builders[4], NActors::TActivationContext::AsActorContext().SelfID.NodeId()); + return false; +} + +std::unique_ptr TReadStatsMetadata::StartScan(const std::shared_ptr& readContext) const { + return std::make_unique(readContext->GetReadMetadataPtrVerifiedAs()); +} + +std::vector> TReadStatsMetadata::GetKeyYqlSchema() const { + return GetColumns(TStatsIterator::StatsSchema, TStatsIterator::StatsSchema.KeyColumns); +} + +std::shared_ptr TConstructor::BuildMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { + auto* index = self->GetIndexOptional(); + return std::make_shared(index ? index->CopyVersionedIndexPtr() : nullptr, self->TabletID(), + IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, + read.GetProgram(), index ? index->GetVersionedIndex().GetLastSchema() : nullptr, read.GetSnapshot()); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.h b/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.h new file mode 100644 index 000000000000..742f1e1be560 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/granules/granules.h @@ -0,0 +1,42 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView::NGranules { + +class TConstructor: public TStatScannerConstructor { +private: + using TBase = TStatScannerConstructor; +protected: + virtual std::shared_ptr BuildMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const override; + +public: + using TBase::TBase; +}; + +struct TReadStatsMetadata: public NAbstract::TReadStatsMetadata { +private: + using TBase = NAbstract::TReadStatsMetadata; + using TSysViewSchema = NKikimr::NSysView::Schema::PrimaryIndexGranuleStats; +public: + using TBase::TBase; + + virtual std::unique_ptr StartScan(const std::shared_ptr& readContext) const override; + virtual std::vector> GetKeyYqlSchema() const override; +}; + +class TStatsIterator : public NAbstract::TStatsIterator { +private: + const std::string HostNameField = HostName(); + using TBase = NAbstract::TStatsIterator; + virtual ui32 PredictRecordsCount(const NAbstract::TGranuleMetaView& /*granule*/) const override { + return 1; + } + virtual bool AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const override; +public: + using TBase::TBase; +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/granules/ya.make b/ydb/core/tx/columnshard/engines/reader/sys_view/granules/ya.make new file mode 100644 index 000000000000..390364b3e64a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/granules/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/sys_view/abstract +) + +SRCS( + granules.cpp +) + +END() + diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp new file mode 100644 index 000000000000..76f1bdda2c7c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.cpp @@ -0,0 +1,58 @@ +#include "portions.h" +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView::NPortions { + +void TStatsIterator::AppendStats(const std::vector>& builders, const TPortionInfo& portion) const { + NArrow::Append(*builders[0], portion.GetPathId()); + const std::string prod = ::ToString(portion.GetMeta().Produced); + NArrow::Append(*builders[1], prod); + NArrow::Append(*builders[2], ReadMetadata->TabletId); + NArrow::Append(*builders[3], portion.NumRows()); + NArrow::Append(*builders[4], portion.GetColumnRawBytes()); + NArrow::Append(*builders[5], portion.GetIndexRawBytes()); + NArrow::Append(*builders[6], portion.GetColumnBlobBytes()); + NArrow::Append(*builders[7], portion.GetIndexBlobBytes()); + NArrow::Append(*builders[8], portion.GetPortionId()); + NArrow::Append(*builders[9], !portion.IsRemovedFor(ReadMetadata->GetRequestSnapshot())); + + auto tierName = portion.GetTierNameDef(NBlobOperations::TGlobal::DefaultStorageId); + NArrow::Append(*builders[10], arrow::util::string_view(tierName.data(), tierName.size())); + auto statInfo = portion.GetMeta().GetStatisticsStorage().SerializeToProto().DebugString(); + NArrow::Append(*builders[11], arrow::util::string_view(statInfo.data(), statInfo.size())); +} + +ui32 TStatsIterator::PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const { + return std::min(10000, granule.GetPortions().size()); +} + +bool TStatsIterator::AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const { + ui64 recordsCount = 0; + while (auto portion = granule.PopFrontPortion()) { + recordsCount += 1; + AppendStats(builders, *portion); + if (recordsCount >= 10000) { + break; + } + } + return granule.GetPortions().size(); +} + +std::unique_ptr TReadStatsMetadata::StartScan(const std::shared_ptr& readContext) const { + return std::make_unique(readContext->GetReadMetadataPtrVerifiedAs()); +} + +std::vector> TReadStatsMetadata::GetKeyYqlSchema() const { + return GetColumns(TStatsIterator::StatsSchema, TStatsIterator::StatsSchema.KeyColumns); +} + +std::shared_ptr TConstructor::BuildMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const { + auto* index = self->GetIndexOptional(); + return std::make_shared(index ? index->CopyVersionedIndexPtr() : nullptr, self->TabletID(), + IsReverse ? TReadMetadataBase::ESorting::DESC : TReadMetadataBase::ESorting::ASC, + read.GetProgram(), index ? index->GetVersionedIndex().GetLastSchema() : nullptr, read.GetSnapshot()); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.h b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.h new file mode 100644 index 000000000000..8bcb8080ad3b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/portions.h @@ -0,0 +1,39 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NReader::NSysView::NPortions { + +class TConstructor: public TStatScannerConstructor { +private: + using TBase = TStatScannerConstructor; +protected: + virtual std::shared_ptr BuildMetadata(const NColumnShard::TColumnShard* self, const TReadDescription& read) const override; + +public: + using TBase::TBase; +}; + +struct TReadStatsMetadata: public NAbstract::TReadStatsMetadata { +private: + using TBase = NAbstract::TReadStatsMetadata; + using TSysViewSchema = NKikimr::NSysView::Schema::PrimaryIndexPortionStats; +public: + using TBase::TBase; + + virtual std::unique_ptr StartScan(const std::shared_ptr& readContext) const override; + virtual std::vector> GetKeyYqlSchema() const override; +}; + +class TStatsIterator : public NAbstract::TStatsIterator { +private: + using TBase = NAbstract::TStatsIterator; + virtual bool AppendStats(const std::vector>& builders, NAbstract::TGranuleMetaView& granule) const override; + virtual ui32 PredictRecordsCount(const NAbstract::TGranuleMetaView& granule) const override; + void AppendStats(const std::vector>& builders, const TPortionInfo& portion) const; +public: + using TBase::TBase; +}; + +} diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/portions/ya.make b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/ya.make new file mode 100644 index 000000000000..0f1cab2459c6 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/portions/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/sys_view/abstract +) + +SRCS( + portions.cpp +) + +END() + diff --git a/ydb/core/tx/columnshard/engines/reader/sys_view/ya.make b/ydb/core/tx/columnshard/engines/reader/sys_view/ya.make new file mode 100644 index 000000000000..d3fdaa984fa4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/sys_view/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/sys_view/abstract + ydb/core/tx/columnshard/engines/reader/sys_view/portions + ydb/core/tx/columnshard/engines/reader/sys_view/chunks + ydb/core/tx/columnshard/engines/reader/sys_view/constructor + ydb/core/tx/columnshard/engines/reader/sys_view/granules +) + +SRCS( +) + +END() + diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp new file mode 100644 index 000000000000..0d054056e39d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.cpp @@ -0,0 +1,274 @@ +#include "tx_scan.h" +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NReader { + +std::vector ExtractTypes(const std::vector>& columns) { + std::vector types; + types.reserve(columns.size()); + for (auto& [name, type] : columns) { + types.push_back(type); + } + return types; +} + +TString FromCells(const TConstArrayRef& cells, const std::vector>& columns) { + Y_ABORT_UNLESS(cells.size() == columns.size()); + if (cells.empty()) { + return {}; + } + + std::vector types = ExtractTypes(columns); + + NArrow::TArrowBatchBuilder batchBuilder; + batchBuilder.Reserve(1); + auto startStatus = batchBuilder.Start(columns); + Y_ABORT_UNLESS(startStatus.ok(), "%s", startStatus.ToString().c_str()); + + batchBuilder.AddRow(NKikimr::TDbTupleRef(), NKikimr::TDbTupleRef(types.data(), cells.data(), cells.size())); + + auto batch = batchBuilder.FlushBatch(false); + Y_ABORT_UNLESS(batch); + Y_ABORT_UNLESS(batch->num_columns() == (int)cells.size()); + Y_ABORT_UNLESS(batch->num_rows() == 1); + return NArrow::SerializeBatchNoCompression(batch); +} + +std::pair RangePredicates(const TSerializedTableRange& range, const std::vector>& columns) { + std::vector leftCells; + std::vector> leftColumns; + bool leftTrailingNull = false; + { + TConstArrayRef cells = range.From.GetCells(); + const size_t size = cells.size(); + Y_ASSERT(size <= columns.size()); + leftCells.reserve(size); + leftColumns.reserve(size); + for (size_t i = 0; i < size; ++i) { + if (!cells[i].IsNull()) { + leftCells.push_back(cells[i]); + leftColumns.push_back(columns[i]); + leftTrailingNull = false; + } else { + leftTrailingNull = true; + } + } + } + + std::vector rightCells; + std::vector> rightColumns; + bool rightTrailingNull = false; + { + TConstArrayRef cells = range.To.GetCells(); + const size_t size = cells.size(); + Y_ASSERT(size <= columns.size()); + rightCells.reserve(size); + rightColumns.reserve(size); + for (size_t i = 0; i < size; ++i) { + if (!cells[i].IsNull()) { + rightCells.push_back(cells[i]); + rightColumns.push_back(columns[i]); + rightTrailingNull = false; + } else { + rightTrailingNull = true; + } + } + } + + const bool fromInclusive = range.FromInclusive || leftTrailingNull; + const bool toInclusive = range.ToInclusive && !rightTrailingNull; + + TString leftBorder = FromCells(leftCells, leftColumns); + TString rightBorder = FromCells(rightCells, rightColumns); + auto leftSchema = NArrow::MakeArrowSchema(leftColumns); + Y_ASSERT(leftSchema.ok()); + auto rightSchema = NArrow::MakeArrowSchema(rightColumns); + Y_ASSERT(rightSchema.ok()); + return std::make_pair( + TPredicate(fromInclusive ? NKernels::EOperation::GreaterEqual : NKernels::EOperation::Greater, leftBorder, leftSchema.ValueUnsafe()), + TPredicate(toInclusive ? NKernels::EOperation::LessEqual : NKernels::EOperation::Less, rightBorder, rightSchema.ValueUnsafe())); +} + +static bool FillPredicatesFromRange(TReadDescription& read, const ::NKikimrTx::TKeyRange& keyRange, + const std::vector>& ydbPk, ui64 tabletId, const TIndexInfo* indexInfo, TString& error) { + TSerializedTableRange range(keyRange); + auto fromPredicate = std::make_shared(); + auto toPredicate = std::make_shared(); + std::tie(*fromPredicate, *toPredicate) = RangePredicates(range, ydbPk); + + LOG_S_DEBUG("TTxScan range predicate. From key size: " << range.From.GetCells().size() + << " To key size: " << range.To.GetCells().size() + << " greater predicate over columns: " << fromPredicate->ToString() + << " less predicate over columns: " << toPredicate->ToString() + << " at tablet " << tabletId); + + if (!read.PKRangesFilter.Add(fromPredicate, toPredicate, indexInfo)) { + error = "Error building filter"; + return false; + } + return true; +} + +bool TTxScan::Execute(TTransactionContext& /*txc*/, const TActorContext& /*ctx*/) { + TMemoryProfileGuard mpg("TTxScan::Execute"); + auto& record = Ev->Get()->Record; + TSnapshot snapshot(record.GetSnapshot().GetStep(), record.GetSnapshot().GetTxId()); + const auto scanId = record.GetScanId(); + const ui64 txId = record.GetTxId(); + + LOG_S_DEBUG("TTxScan prepare txId: " << txId << " scanId: " << scanId << " at tablet " << Self->TabletID()); + + TReadDescription read(snapshot, record.GetReverse()); + read.PathId = record.GetLocalPathId(); + read.ReadNothing = !Self->TablesManager.HasTable(read.PathId); + read.TableName = record.GetTablePath(); + bool isIndex = false; + std::unique_ptr scannerConstructor = [&]() { + const ui64 itemsLimit = record.HasItemsLimit() ? record.GetItemsLimit() : 0; + if (read.TableName.EndsWith(TIndexInfo::STORE_INDEX_STATS_TABLE) || + read.TableName.EndsWith(TIndexInfo::TABLE_INDEX_STATS_TABLE)) { + return std::unique_ptr(new NSysView::NChunks::TConstructor(snapshot, itemsLimit, record.GetReverse())); + } + if (read.TableName.EndsWith(TIndexInfo::STORE_INDEX_PORTION_STATS_TABLE) || + read.TableName.EndsWith(TIndexInfo::TABLE_INDEX_PORTION_STATS_TABLE)) { + return std::unique_ptr(new NSysView::NPortions::TConstructor(snapshot, itemsLimit, record.GetReverse())); + } + if (read.TableName.EndsWith(TIndexInfo::STORE_INDEX_GRANULE_STATS_TABLE) || + read.TableName.EndsWith(TIndexInfo::TABLE_INDEX_GRANULE_STATS_TABLE)) { + return std::unique_ptr(new NSysView::NGranules::TConstructor(snapshot, itemsLimit, record.GetReverse())); + } + isIndex = true; + return std::unique_ptr(new NPlain::TIndexScannerConstructor(snapshot, itemsLimit, record.GetReverse())); + }(); + read.ColumnIds.assign(record.GetColumnTags().begin(), record.GetColumnTags().end()); + read.StatsMode = record.GetStatsMode(); + + const TVersionedIndex* vIndex = Self->GetIndexOptional() ? &Self->GetIndexOptional()->GetVersionedIndex() : nullptr; + auto parseResult = scannerConstructor->ParseProgram(vIndex, record, read); + if (!parseResult) { + ErrorDescription = parseResult.GetErrorMessage(); + return true; + } + + if (!record.RangesSize()) { + auto range = scannerConstructor->BuildReadMetadata(Self, read); + if (range.IsSuccess()) { + ReadMetadataRange = range.DetachResult(); + } else { + ErrorDescription = range.GetErrorMessage(); + } + return true; + } + + auto ydbKey = scannerConstructor->GetPrimaryKeyScheme(Self); + auto* indexInfo = (vIndex && isIndex) ? &vIndex->GetSchema(snapshot)->GetIndexInfo() : nullptr; + for (auto& range : record.GetRanges()) { + if (!FillPredicatesFromRange(read, range, ydbKey, Self->TabletID(), indexInfo, ErrorDescription)) { + ReadMetadataRange = nullptr; + return true; + } + } + { + auto newRange = scannerConstructor->BuildReadMetadata(Self, read); + if (!newRange) { + ErrorDescription = newRange.GetErrorMessage(); + ReadMetadataRange = nullptr; + return true; + } + ReadMetadataRange = newRange.DetachResult(); + } + AFL_VERIFY(ReadMetadataRange); + return true; +} + +template +struct TContainerPrinter { + const T& Ref; + + TContainerPrinter(const T& ref) + : Ref(ref) { + } + + friend IOutputStream& operator << (IOutputStream& out, const TContainerPrinter& cont) { + for (auto& ptr : cont.Ref) { + out << *ptr << " "; + } + return out; + } +}; + +void TTxScan::Complete(const TActorContext& ctx) { + TMemoryProfileGuard mpg("TTxScan::Complete"); + auto& request = Ev->Get()->Record; + auto scanComputeActor = Ev->Sender; + const auto& snapshot = request.GetSnapshot(); + const auto scanId = request.GetScanId(); + const ui64 txId = request.GetTxId(); + const ui32 scanGen = request.GetGeneration(); + TString table = request.GetTablePath(); + auto dataFormat = request.GetDataFormat(); + const TDuration timeout = TDuration::MilliSeconds(request.GetTimeoutMs()); + if (scanGen > 1) { + Self->IncCounter(NColumnShard::COUNTER_SCAN_RESTARTED); + } + const NActors::TLogContextGuard gLogging = NActors::TLogContextBuilder::Build() + ("tx_id", txId)("scan_id", scanId)("gen", scanGen)("table", table)("snapshot", snapshot)("tablet", Self->TabletID())("timeout", timeout); + + if (!ReadMetadataRange) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", "no metadata"); + + auto ev = MakeHolder(scanGen, Self->TabletID()); + ev->Record.SetStatus(Ydb::StatusIds::BAD_REQUEST); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_BAD_REQUEST, TStringBuilder() + << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << ErrorDescription ? ErrorDescription : "no metadata ranges"); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + + ctx.Send(scanComputeActor, ev.Release()); + return; + } + TStringBuilder detailedInfo; + if (IS_LOG_PRIORITY_ENABLED(NActors::NLog::PRI_TRACE, NKikimrServices::TX_COLUMNSHARD)) { + detailedInfo << " read metadata: (" << *ReadMetadataRange << ")" << " req: " << request; + } + + const TVersionedIndex* index = nullptr; + if (Self->HasIndex()) { + index = &Self->GetIndexAs().GetVersionedIndex(); + } + const TConclusion requestCookie = Self->InFlightReadsTracker.AddInFlightRequest(ReadMetadataRange, index); + if (!requestCookie) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan failed")("reason", requestCookie.GetErrorMessage())("trace_details", detailedInfo); + auto ev = MakeHolder(scanGen, Self->TabletID()); + + ev->Record.SetStatus(Ydb::StatusIds::INTERNAL_ERROR); + auto issue = NYql::YqlIssue({}, NYql::TIssuesIds::KIKIMR_TEMPORARILY_UNAVAILABLE, TStringBuilder() + << "Table " << table << " (shard " << Self->TabletID() << ") scan failed, reason: " << requestCookie.GetErrorMessage()); + NYql::IssueToMessage(issue, ev->Record.MutableIssues()->Add()); + Self->ScanCounters.OnScanDuration(NColumnShard::TScanCounters::EStatusFinish::CannotAddInFlight, TDuration::Zero()); + ctx.Send(scanComputeActor, ev.Release()); + return; + } + auto statsDelta = Self->InFlightReadsTracker.GetSelectStatsDelta(); + + Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_PORTIONS, statsDelta.Portions); + Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_BLOBS, statsDelta.Blobs); + Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_ROWS, statsDelta.Rows); + Self->IncCounter(NColumnShard::COUNTER_READ_INDEX_BYTES, statsDelta.Bytes); + + TComputeShardingPolicy shardingPolicy; + AFL_VERIFY(shardingPolicy.DeserializeFromProto(request.GetComputeShardingPolicy())); + + auto scanActor = ctx.Register(new TColumnShardScan(Self->SelfId(), scanComputeActor, Self->GetStoragesManager(), + shardingPolicy, scanId, txId, scanGen, *requestCookie, Self->TabletID(), timeout, ReadMetadataRange, dataFormat, Self->ScanCounters)); + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD_SCAN)("event", "TTxScan started")("actor_id", scanActor)("trace_detailed", detailedInfo); +} + +} diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h new file mode 100644 index 000000000000..2d9eb9619a64 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/transaction/tx_scan.h @@ -0,0 +1,28 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NReader { +class TTxScan: public NTabletFlatExecutor::TTransactionBase { +private: + using TBase = NTabletFlatExecutor::TTransactionBase; +public: + using TReadMetadataPtr = TReadMetadataBase::TConstPtr; + + TTxScan(NColumnShard::TColumnShard* self, TEvColumnShard::TEvScan::TPtr& ev) + : TBase(self) + , Ev(ev) { + } + + bool Execute(TTransactionContext& txc, const TActorContext& ctx) override; + void Complete(const TActorContext& ctx) override; + TTxType GetTxType() const override { return NColumnShard::TXTYPE_START_SCAN; } + +private: + TString ErrorDescription; + TEvColumnShard::TEvScan::TPtr Ev; + TReadMetadataPtr ReadMetadataRange; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/reader/transaction/ya.make b/ydb/core/tx/columnshard/engines/reader/transaction/ya.make new file mode 100644 index 000000000000..a8bc351fdebc --- /dev/null +++ b/ydb/core/tx/columnshard/engines/reader/transaction/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + tx_scan.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/reader/abstract + ydb/core/tablet_flat + ydb/core/tx/columnshard/engines/reader/actor + ydb/core/tx/columnshard/engines/reader/sys_view/constructor + ydb/core/tx/columnshard/engines/reader/plain_reader/constructor +) + +END() diff --git a/ydb/core/tx/columnshard/engines/reader/ya.make b/ydb/core/tx/columnshard/engines/reader/ya.make index f673de8200e8..c1a5dbd87327 100644 --- a/ydb/core/tx/columnshard/engines/reader/ya.make +++ b/ydb/core/tx/columnshard/engines/reader/ya.make @@ -1,12 +1,6 @@ LIBRARY() SRCS( - conveyor_task.cpp - description.cpp - queue.cpp - read_filter_merger.cpp - read_metadata.cpp - read_context.cpp ) PEERDIR( @@ -18,10 +12,12 @@ PEERDIR( ydb/core/tx/columnshard/resources ydb/core/tx/program ydb/core/tx/columnshard/engines/reader/plain_reader + ydb/core/tx/columnshard/engines/reader/sys_view + ydb/core/tx/columnshard/engines/reader/abstract + ydb/core/tx/columnshard/engines/reader/common + ydb/core/tx/columnshard/engines/reader/actor + ydb/core/tx/columnshard/engines/reader/transaction ydb/core/tx/columnshard/engines/scheme ) -GENERATE_ENUM_SERIALIZATION(read_metadata.h) -YQL_LAST_ABI_VERSION() - END() diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.cpp new file mode 100644 index 000000000000..d849f5c9bbff --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.cpp @@ -0,0 +1,20 @@ +#include "index_info.h" +#include +#include + +namespace NKikimr::NOlap { + +const TString IIndexInfo::STORE_INDEX_STATS_TABLE = TString("/") + NSysView::SysPathName + "/" + NSysView::StorePrimaryIndexStatsName; +const TString IIndexInfo::STORE_INDEX_PORTION_STATS_TABLE = TString("/") + NSysView::SysPathName + "/" + NSysView::StorePrimaryIndexPortionStatsName; +const TString IIndexInfo::STORE_INDEX_GRANULE_STATS_TABLE = TString("/") + NSysView::SysPathName + "/" + NSysView::StorePrimaryIndexGranuleStatsName; +const TString IIndexInfo::TABLE_INDEX_STATS_TABLE = TString("/") + NSysView::SysPathName + "/" + NSysView::TablePrimaryIndexStatsName; +const TString IIndexInfo::TABLE_INDEX_PORTION_STATS_TABLE = TString("/") + NSysView::SysPathName + "/" + NSysView::TablePrimaryIndexPortionStatsName; +const TString IIndexInfo::TABLE_INDEX_GRANULE_STATS_TABLE = TString("/") + NSysView::SysPathName + "/" + NSysView::TablePrimaryIndexGranuleStatsName; + +std::shared_ptr IIndexInfo::GetColumnLoaderVerified(const ui32 columnId) const { + auto result = GetColumnLoaderOptional(columnId); + AFL_VERIFY(result); + return result; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h new file mode 100644 index 000000000000..343ad58746b1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/index_info.h @@ -0,0 +1,39 @@ +#pragma once +#include "loader.h" + +#include + +namespace NKikimr::NOlap { + +class IIndexInfo { +public: + enum class ESpecialColumn: ui32 { + PLAN_STEP = NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP_INDEX, + TX_ID = NOlap::NPortion::TSpecialColumns::SPEC_COL_TX_ID_INDEX + }; + + static constexpr const char* SPEC_COL_PLAN_STEP = NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP; + static constexpr const char* SPEC_COL_TX_ID = NOlap::NPortion::TSpecialColumns::SPEC_COL_TX_ID; + static const TString STORE_INDEX_STATS_TABLE; + static const TString STORE_INDEX_PORTION_STATS_TABLE; + static const TString STORE_INDEX_GRANULE_STATS_TABLE; + static const TString TABLE_INDEX_STATS_TABLE; + static const TString TABLE_INDEX_PORTION_STATS_TABLE; + static const TString TABLE_INDEX_GRANULE_STATS_TABLE; + + static const std::vector& GetSpecialColumnNames() { + static const std::vector result = {std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID)}; + return result; + } + + static const std::vector& GetSpecialColumnIds() { + static const std::vector result = {(ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID}; + return result; + } + + virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; + std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; + virtual ~IIndexInfo() = default; +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp new file mode 100644 index 000000000000..a6f534c87190 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.cpp @@ -0,0 +1,60 @@ +#include "loader.h" +#include + +namespace NKikimr::NOlap { + +TString TColumnLoader::DebugString() const { + TStringBuilder result; + if (ExpectedSchema) { + result << "schema:" << ExpectedSchema->ToString() << ";"; + } + if (Transformer) { + result << "transformer:" << Transformer->DebugString() << ";"; + } + if (Serializer) { + result << "serializer:" << Serializer->DebugString() << ";"; + } + return result; +} + +TColumnLoader::TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, + const std::shared_ptr& expectedSchema, const ui32 columnId) + : Transformer(transformer) + , Serializer(serializer) + , ExpectedSchema(expectedSchema) + , ColumnId(columnId) { + Y_ABORT_UNLESS(ExpectedSchema); + auto fieldsCountStr = ::ToString(ExpectedSchema->num_fields()); + Y_ABORT_UNLESS(ExpectedSchema->num_fields() == 1, "%s", fieldsCountStr.data()); + Y_ABORT_UNLESS(Serializer); +} + +const std::shared_ptr& TColumnLoader::GetField() const { + return ExpectedSchema->field(0); +} + +arrow::Result> TColumnLoader::Apply(const TString& data) const { + Y_ABORT_UNLESS(Serializer); + arrow::Result> columnArray = + Transformer ? Serializer->Deserialize(data) : Serializer->Deserialize(data, ExpectedSchema); + if (!columnArray.ok()) { + return columnArray; + } + if (Transformer) { + return Transformer->Transform(*columnArray); + } else { + return columnArray; + } +} + +std::shared_ptr TColumnLoader::ApplyVerified(const TString& data) const { + return NArrow::TStatusValidator::GetValid(Apply(data)); +} + +std::shared_ptr TColumnLoader::ApplyVerifiedColumn(const TString& data) const { + auto rb = ApplyVerified(data); + AFL_VERIFY(rb->num_columns() == 1)("schema", rb->schema()->ToString()); + return rb->column(0); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h new file mode 100644 index 000000000000..306f3aa21671 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/loader.h @@ -0,0 +1,49 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap { + +class TColumnLoader { +private: + NArrow::NTransformation::ITransformer::TPtr Transformer; + NArrow::NSerialization::TSerializerContainer Serializer; + std::shared_ptr ExpectedSchema; + const ui32 ColumnId; +public: + bool IsEqualTo(const TColumnLoader& item) const { + if (!!Transformer != !!item.Transformer) { + return false; + } else if (!!Transformer && !Transformer->IsEqualTo(*item.Transformer)) { + return false; + } + if (!Serializer.IsEqualTo(item.Serializer)) { + return false; + } + return true; + } + + TString DebugString() const; + + TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, + const std::shared_ptr& expectedSchema, const ui32 columnId); + + ui32 GetColumnId() const { + return ColumnId; + } + + const std::shared_ptr& GetField() const; + + const std::shared_ptr& GetExpectedSchema() const { + return ExpectedSchema; + } + + arrow::Result> Apply(const TString& data) const; + + std::shared_ptr ApplyVerified(const TString& data) const; + + std::shared_ptr ApplyVerifiedColumn(const TString& data) const; +}; + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp new file mode 100644 index 000000000000..c15db92b8eec --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/saver.cpp @@ -0,0 +1,31 @@ +#include "saver.h" + +namespace NKikimr::NOlap { + +TColumnSaver::TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer) + : Transformer(transformer) + , Serializer(serializer) +{ + Y_ABORT_UNLESS(Serializer); +} + +bool TColumnSaver::IsHardPacker() const { + return Serializer->IsHardPacker(); +} + +TString TColumnSaver::Apply(std::shared_ptr data, std::shared_ptr field) const { + auto schema = std::make_shared(arrow::FieldVector{field}); + auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); + return Apply(batch); +} + +TString TColumnSaver::Apply(const std::shared_ptr& data) const { + Y_ABORT_UNLESS(Serializer); + if (Transformer) { + return Serializer->SerializeFull(Transformer->Transform(data)); + } else { + return Serializer->SerializePayload(data); + } +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h b/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h new file mode 100644 index 000000000000..c4d10c55a359 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/saver.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap { + +class TColumnSaver { +private: + NArrow::NTransformation::ITransformer::TPtr Transformer; + NArrow::NSerialization::TSerializerContainer Serializer; +public: + TColumnSaver() = default; + TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer); + + void ResetSerializer(const NArrow::NSerialization::TSerializerContainer& serializer) { + AFL_VERIFY(serializer); + if (Serializer.IsCompatibleForExchange(serializer)) { + Serializer = serializer; + } else { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_reset_serializer")("reason", "incompatible_serializers"); + } + } + + bool IsHardPacker() const; + + TString Apply(std::shared_ptr data, std::shared_ptr field) const; + + TString Apply(const std::shared_ptr& data) const; +}; + + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make new file mode 100644 index 000000000000..b830415daae1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/abstract/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +SRCS( + saver.cpp + index_info.cpp + loader.cpp +) + +PEERDIR( + ydb/library/actors/core + ydb/core/formats/arrow/transformer + ydb/core/formats/arrow/serializer +) + +YQL_LAST_ABI_VERSION() + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp index a80496460e90..e779afcaa956 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.cpp @@ -1,119 +1 @@ #include "abstract_scheme.h" - -#include -#include - -namespace NKikimr::NOlap { - -std::shared_ptr ISnapshotSchema::GetFieldByIndex(const int index) const { - auto schema = GetSchema(); - if (!schema || index < 0 || index >= schema->num_fields()) { - return nullptr; - } - return schema->field(index); -} -std::shared_ptr ISnapshotSchema::GetFieldByColumnIdOptional(const ui32 columnId) const { - return GetFieldByIndex(GetFieldIndex(columnId)); -} - -std::set ISnapshotSchema::GetPkColumnsIds() const { - std::set result; - for (auto&& field : GetIndexInfo().GetReplaceKey()->fields()) { - result.emplace(GetColumnId(field->name())); - } - return result; - -} - -std::shared_ptr ISnapshotSchema::NormalizeBatch(const ISnapshotSchema& dataSchema, const std::shared_ptr batch) const { - if (dataSchema.GetSnapshot() == GetSnapshot()) { - return batch; - } - Y_ABORT_UNLESS(dataSchema.GetSnapshot() < GetSnapshot()); - const std::shared_ptr& resultArrowSchema = GetSchema(); - std::vector> newColumns; - newColumns.reserve(resultArrowSchema->num_fields()); - - for (size_t i = 0; i < resultArrowSchema->fields().size(); ++i) { - auto& resultField = resultArrowSchema->fields()[i]; - auto columnId = GetIndexInfo().GetColumnId(resultField->name()); - auto oldColumnIndex = dataSchema.GetFieldIndex(columnId); - if (oldColumnIndex >= 0) { // ColumnExists - auto oldColumnInfo = dataSchema.GetFieldByIndex(oldColumnIndex); - Y_ABORT_UNLESS(oldColumnInfo); - auto columnData = batch->GetColumnByName(oldColumnInfo->name()); - Y_ABORT_UNLESS(columnData); - newColumns.push_back(columnData); - } else { // AddNullColumn - auto nullColumn = NArrow::MakeEmptyBatch(arrow::schema({resultField}), batch->num_rows()); - newColumns.push_back(nullColumn->column(0)); - } - } - return arrow::RecordBatch::Make(resultArrowSchema, batch->num_rows(), newColumns); -} - -std::shared_ptr ISnapshotSchema::PrepareForInsert(const TString& data, const std::shared_ptr& dataSchema) const { - std::shared_ptr dstSchema = GetIndexInfo().ArrowSchema(); - auto batch = NArrow::DeserializeBatch(data, (dataSchema ? dataSchema : dstSchema)); - if (!batch) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "DeserializeBatch() failed"); - return nullptr; - } - if (batch->num_rows() == 0) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "empty batch"); - return nullptr; - } - - // Correct schema - if (dataSchema) { - batch = NArrow::ExtractColumns(batch, dstSchema, true); - if (!batch) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot correct schema"); - return nullptr; - } - } - - if (!batch->schema()->Equals(dstSchema)) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", TStringBuilder() << "unexpected schema for insert batch: '" << batch->schema()->ToString() << "'"); - return nullptr; - } - - const auto& sortingKey = GetIndexInfo().GetPrimaryKey(); - Y_ABORT_UNLESS(sortingKey); - - // Check PK is NOT NULL - for (auto& field : sortingKey->fields()) { - auto column = batch->GetColumnByName(field->name()); - if (!column) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", TStringBuilder() << "missing PK column '" << field->name() << "'"); - return nullptr; - } - if (NArrow::HasNulls(column)) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", TStringBuilder() << "PK column '" << field->name() << "' contains NULLs"); - return nullptr; - } - } - - auto status = batch->ValidateFull(); - if (!status.ok()) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", status.ToString()); - return nullptr; - } - batch = NArrow::SortBatch(batch, sortingKey, true); - Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, sortingKey)); - return batch; -} - -ui32 ISnapshotSchema::GetColumnId(const std::string& columnName) const { - auto id = GetColumnIdOptional(columnName); - AFL_VERIFY(id)("column_name", columnName)("schema", JoinSeq(",", GetSchema()->field_names())); - return *id; -} - -std::shared_ptr ISnapshotSchema::GetFieldByColumnIdVerified(const ui32 columnId) const { - auto result = GetFieldByColumnIdOptional(columnId); - AFL_VERIFY(result)("event", "unknown_column")("column_id", columnId)("schema", DebugString()); - return result; -} - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h index bde8318423ee..25f6253c385b 100644 --- a/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/abstract_scheme.h @@ -1,73 +1,2 @@ #pragma once - -#include -#include -#include - -#include - -#include - -namespace NKikimr::NOlap { - -struct TIndexInfo; - -class ISnapshotSchema { -protected: - virtual TString DoDebugString() const = 0; -public: - using TPtr = std::shared_ptr; - - virtual ~ISnapshotSchema() {} - virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; - std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const { - auto result = GetColumnLoaderOptional(columnId); - AFL_VERIFY(result); - return result; - } - std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const { - const std::optional id = GetColumnIdOptional(columnName); - if (id) { - return GetColumnLoaderOptional(*id); - } else { - return nullptr; - } - } - std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const { - auto result = GetColumnLoaderOptional(columnName); - AFL_VERIFY(result); - return result; - } - - virtual TColumnSaver GetColumnSaver(const ui32 columnId, const TSaverContext& context) const = 0; - TColumnSaver GetColumnSaver(const TString& columnName, const TSaverContext& context) const { - return GetColumnSaver(GetColumnId(columnName), context); - } - TColumnSaver GetColumnSaver(const std::string& columnName, const TSaverContext& context) const { - return GetColumnSaver(TString(columnName.data(), columnName.size()), context); - } - - virtual std::optional GetColumnIdOptional(const std::string& columnName) const = 0; - virtual int GetFieldIndex(const ui32 columnId) const = 0; - - ui32 GetColumnId(const std::string& columnName) const; - std::shared_ptr GetFieldByIndex(const int index) const; - std::shared_ptr GetFieldByColumnIdOptional(const ui32 columnId) const; - std::shared_ptr GetFieldByColumnIdVerified(const ui32 columnId) const; - - TString DebugString() const { - return DoDebugString(); - } - virtual const std::shared_ptr& GetSchema() const = 0; - virtual const TIndexInfo& GetIndexInfo() const = 0; - virtual const TSnapshot& GetSnapshot() const = 0; - virtual ui64 GetVersion() const = 0; - virtual ui32 GetColumnsCount() const = 0; - - std::set GetPkColumnsIds() const; - - std::shared_ptr NormalizeBatch(const ISnapshotSchema& dataSchema, const std::shared_ptr batch) const; - std::shared_ptr PrepareForInsert(const TString& data, const std::shared_ptr& dataSchema) const; -}; - -} // namespace NKikimr::NOlap +#include "versions/abstract_scheme.h" diff --git a/ydb/core/tx/columnshard/engines/scheme/column/info.cpp b/ydb/core/tx/columnshard/engines/scheme/column/info.cpp new file mode 100644 index 000000000000..7444cfa3093e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/column/info.cpp @@ -0,0 +1,90 @@ +#include "info.h" +#include + +namespace NKikimr::NOlap { + +NArrow::NTransformation::ITransformer::TPtr TSimpleColumnInfo::GetSaveTransformer() const { + NArrow::NTransformation::ITransformer::TPtr transformer; + if (DictionaryEncoding) { + transformer = DictionaryEncoding->BuildEncoder(); + } + return transformer; +} + +NArrow::NTransformation::ITransformer::TPtr TSimpleColumnInfo::GetLoadTransformer() const { + NArrow::NTransformation::ITransformer::TPtr transformer; + if (DictionaryEncoding) { + transformer = DictionaryEncoding->BuildDecoder(); + } + return transformer; +} + +TConclusionStatus TSimpleColumnInfo::DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo) +{ + AFL_VERIFY(columnInfo.GetId() == ColumnId); + if (columnInfo.HasSerializer()) { + AFL_VERIFY(Serializer.DeserializeFromProto(columnInfo.GetSerializer())); + } else if (columnInfo.HasCompression()) { + Serializer.DeserializeFromProto(columnInfo.GetCompression()).Validate(); + } + AFL_VERIFY(Serializer); + if (columnInfo.HasDictionaryEncoding()) { + auto settings = NArrow::NDictionary::TEncodingSettings::BuildFromProto(columnInfo.GetDictionaryEncoding()); + Y_ABORT_UNLESS(settings.IsSuccess()); + DictionaryEncoding = *settings; + } + Loader = std::make_shared(GetLoadTransformer(), Serializer, ArrowSchema, ColumnId); + return TConclusionStatus::Success(); +} + +TSimpleColumnInfo::TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, + const bool needMinMax, const bool isSorted) + : ColumnId(columnId) + , ArrowField(arrowField) + , ArrowSchema(std::make_shared(arrow::FieldVector({arrowField}))) + , Serializer(serializer) + , NeedMinMax(needMinMax) + , IsSorted(isSorted) +{ + ColumnName = ArrowField->name(); + Loader = std::make_shared(GetLoadTransformer(), Serializer, ArrowSchema, ColumnId); +} + +std::vector> TSimpleColumnInfo::ActualizeColumnData(const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const { + AFL_VERIFY(Loader); + const auto checkNeedActualize = [&]() { + if (!Serializer.IsEqualTo(sourceColumnFeatures.Serializer)) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "actualization")("reason", "serializer") + ("from", sourceColumnFeatures.Serializer.SerializeToProto().DebugString()) + ("to", Serializer.SerializeToProto().DebugString()); + return true; + } + if (!Loader->IsEqualTo(*sourceColumnFeatures.Loader)) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "actualization")("reason", "loader"); + return true; + } + if (!!DictionaryEncoding != !!sourceColumnFeatures.DictionaryEncoding) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "actualization")("reason", "dictionary")("from", !!sourceColumnFeatures.DictionaryEncoding)("to", !!DictionaryEncoding); + return true; + } + if (!!DictionaryEncoding && !DictionaryEncoding->IsEqualTo(*sourceColumnFeatures.DictionaryEncoding)) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "actualization")("reason", "dictionary_encoding") + ("from", sourceColumnFeatures.DictionaryEncoding->SerializeToProto().DebugString()) + ("to", DictionaryEncoding->SerializeToProto().DebugString()) + ; + return true; + } + return false; + }; + if (!checkNeedActualize()) { + return source; + } + std::vector> result; + for (auto&& s : source) { + auto data = NArrow::TStatusValidator::GetValid(sourceColumnFeatures.Loader->Apply(s->GetData())); + result.emplace_back(s->CopyWithAnotherBlob(GetColumnSaver().Apply(data), *this)); + } + return result; +} + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/column/info.h b/ydb/core/tx/columnshard/engines/scheme/column/info.h new file mode 100644 index 000000000000..5f4eced0eae6 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/column/info.h @@ -0,0 +1,61 @@ +#pragma once +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include + +namespace NKikimr::NOlap { + +class IPortionDataChunk; + +class TSimpleColumnInfo { +private: + YDB_READONLY(ui32, ColumnId, 0); + YDB_READONLY_DEF(TString, ColumnName); + YDB_READONLY_DEF(std::shared_ptr, ArrowField); + YDB_READONLY_DEF(std::shared_ptr, ArrowSchema); + YDB_READONLY(NArrow::NSerialization::TSerializerContainer, Serializer, NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()); + YDB_READONLY(bool, NeedMinMax, false); + YDB_READONLY(bool, IsSorted, false); + std::optional DictionaryEncoding; + std::shared_ptr Loader; + NArrow::NTransformation::ITransformer::TPtr GetLoadTransformer() const; + +public: + + TSimpleColumnInfo(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, const bool needMinMax, const bool isSorted); + + TColumnSaver GetColumnSaver() const { + NArrow::NTransformation::ITransformer::TPtr transformer = GetSaveTransformer(); + AFL_VERIFY(Serializer); + return TColumnSaver(transformer, Serializer); + } + + std::vector> ActualizeColumnData(const std::vector>& source, const TSimpleColumnInfo& sourceColumnFeatures) const; + + TString DebugString() const { + TStringBuilder sb; + sb << "serializer=" << (Serializer ? Serializer->DebugString() : "NO") << ";"; + sb << "encoding=" << (DictionaryEncoding ? DictionaryEncoding->DebugString() : "NO") << ";"; + sb << "loader=" << (Loader ? Loader->DebugString() : "NO") << ";"; + return sb; + } + + NArrow::NTransformation::ITransformer::TPtr GetSaveTransformer() const; + TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo); + + const std::shared_ptr& GetLoader() const { + AFL_VERIFY(Loader); + return Loader; + } +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/column/ya.make b/ydb/core/tx/columnshard/engines/scheme/column/ya.make new file mode 100644 index 000000000000..79a17cbe405c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/column/ya.make @@ -0,0 +1,18 @@ +LIBRARY() + +SRCS( + info.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/abstract + + ydb/core/formats/arrow/dictionary + ydb/core/formats/arrow/serializer + ydb/core/formats/arrow/transformer + ydb/core/formats/arrow/common + + contrib/libs/apache/arrow +) + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/column_features.cpp b/ydb/core/tx/columnshard/engines/scheme/column_features.cpp index 72d5059c0053..49a0e78325dc 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column_features.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/column_features.cpp @@ -5,69 +5,4 @@ namespace NKikimr::NOlap { -NArrow::NTransformation::ITransformer::TPtr TColumnFeatures::GetSaveTransformer() const { - NArrow::NTransformation::ITransformer::TPtr transformer; - if (DictionaryEncoding) { - transformer = DictionaryEncoding->BuildEncoder(); - } - return transformer; -} - -NArrow::NTransformation::ITransformer::TPtr TColumnFeatures::GetLoadTransformer() const { - NArrow::NTransformation::ITransformer::TPtr transformer; - if (DictionaryEncoding) { - transformer = DictionaryEncoding->BuildDecoder(); - } - return transformer; -} - -void TColumnFeatures::InitLoader(const TIndexInfo& info) { - auto schema = info.GetColumnSchema(ColumnId); - Loader = std::make_shared(GetLoadTransformer(), Serializer, schema, ColumnId); -} - -std::optional TColumnFeatures::BuildFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo, const TIndexInfo& indexInfo) { - const ui32 columnId = columnInfo.GetId(); - TColumnFeatures result(columnId); - if (columnInfo.HasSerializer()) { - AFL_VERIFY(result.Serializer.DeserializeFromProto(columnInfo.GetSerializer())); - } else if (columnInfo.HasCompression()) { - AFL_VERIFY(result.Serializer.DeserializeFromProto(columnInfo.GetCompression())); - } - if (columnInfo.HasDictionaryEncoding()) { - auto settings = NArrow::NDictionary::TEncodingSettings::BuildFromProto(columnInfo.GetDictionaryEncoding()); - Y_ABORT_UNLESS(settings.IsSuccess()); - result.DictionaryEncoding = *settings; - } - result.InitLoader(indexInfo); - return result; -} - -NKikimr::NOlap::TColumnFeatures TColumnFeatures::BuildFromIndexInfo(const ui32 columnId, const TIndexInfo& indexInfo) { - TColumnFeatures result(columnId); - result.InitLoader(indexInfo); - return result; -} - -TColumnFeatures::TColumnFeatures(const ui32 columnId) - : ColumnId(columnId) - , Serializer(NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()) -{ - -} - -TString TColumnLoader::DebugString() const { - TStringBuilder result; - if (ExpectedSchema) { - result << "schema:" << ExpectedSchema->ToString() << ";"; - } - if (Transformer) { - result << "transformer:" << Transformer->DebugString() << ";"; - } - if (Serializer) { - result << "serializer:" << Serializer->DebugString() << ";"; - } - return result; -} - } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/column_features.h b/ydb/core/tx/columnshard/engines/scheme/column_features.h index 7e99af80eb72..7f8f5017559d 100644 --- a/ydb/core/tx/columnshard/engines/scheme/column_features.h +++ b/ydb/core/tx/columnshard/engines/scheme/column_features.h @@ -1,165 +1,53 @@ #pragma once +#include "abstract/loader.h" +#include "abstract/saver.h" +#include "column/info.h" + #include #include #include #include #include +#include +#include + #include #include -#include namespace NKikimr::NOlap { class TSaverContext { private: - TString TierName; - std::optional ExternalSerializer; - YDB_READONLY_DEF(std::shared_ptr, StorageOperator); YDB_READONLY_DEF(std::shared_ptr, StoragesManager); public: - TSaverContext(const std::shared_ptr& storageOperator, const std::shared_ptr& storagesManager) - : StorageOperator(storageOperator) - , StoragesManager(storagesManager) { - - } - - const std::optional& GetExternalSerializer() const { - return ExternalSerializer; - } - TSaverContext& SetExternalSerializer(const std::optional& value) { - AFL_VERIFY(!!value); - ExternalSerializer = value; - return *this; - } - const TString& GetTierName() const { - return TierName; - } - TSaverContext& SetTierName(const TString& value) { - TierName = value; - return *this; - } -}; - -class TColumnSaver { -private: - NArrow::NTransformation::ITransformer::TPtr Transformer; - NArrow::NSerialization::TSerializerContainer Serializer; -public: - TColumnSaver() = default; - TColumnSaver(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer serializer) - : Transformer(transformer) - , Serializer(serializer) { - Y_ABORT_UNLESS(Serializer); - } - - bool IsHardPacker() const { - return Serializer->IsHardPacker(); - } - - TString Apply(std::shared_ptr data, std::shared_ptr field) const { - auto schema = std::make_shared(arrow::FieldVector{field}); - auto batch = arrow::RecordBatch::Make(schema, data->length(), {data}); - return Apply(batch); - } - - TString Apply(const std::shared_ptr& data) const { - Y_ABORT_UNLESS(Serializer); - if (Transformer) { - return Serializer->SerializeFull(Transformer->Transform(data)); - } else { - return Serializer->SerializePayload(data); - } - } -}; - -class TColumnLoader { -private: - NArrow::NTransformation::ITransformer::TPtr Transformer; - NArrow::NSerialization::TSerializerContainer Serializer; - std::shared_ptr ExpectedSchema; - const ui32 ColumnId; -public: - TString DebugString() const; - - TColumnLoader(NArrow::NTransformation::ITransformer::TPtr transformer, const NArrow::NSerialization::TSerializerContainer& serializer, - const std::shared_ptr& expectedSchema, const ui32 columnId) - : Transformer(transformer) - , Serializer(serializer) - , ExpectedSchema(expectedSchema) - , ColumnId(columnId) { - Y_ABORT_UNLESS(ExpectedSchema); - auto fieldsCountStr = ::ToString(ExpectedSchema->num_fields()); - Y_ABORT_UNLESS(ExpectedSchema->num_fields() == 1, "%s", fieldsCountStr.data()); - Y_ABORT_UNLESS(Serializer); - } - - ui32 GetColumnId() const { - return ColumnId; - } - - const std::shared_ptr& GetField() const { - return ExpectedSchema->field(0); - } - - const std::shared_ptr& GetExpectedSchema() const { - return ExpectedSchema; - } - - arrow::Result> Apply(const TString& data) const { - Y_ABORT_UNLESS(Serializer); - arrow::Result> columnArray = - Transformer ? Serializer->Deserialize(data) : Serializer->Deserialize(data, ExpectedSchema); - if (!columnArray.ok()) { - return columnArray; - } - if (Transformer) { - return Transformer->Transform(*columnArray); - } else { - return columnArray; - } - } - - std::shared_ptr ApplyVerified(const TString& data) const { - return NArrow::TStatusValidator::GetValid(Apply(data)); - } - - std::shared_ptr ApplyVerifiedColumn(const TString& data) const { - auto rb = ApplyVerified(data); - AFL_VERIFY(rb->num_columns() == 1)("schema", rb->schema()->ToString()); - return rb->column(0); + TSaverContext(const std::shared_ptr& storagesManager) + : StoragesManager(storagesManager) { + AFL_VERIFY(StoragesManager); } }; struct TIndexInfo; -class TColumnFeatures { +class TColumnFeatures: public TSimpleColumnInfo { private: - ui32 ColumnId; - YDB_READONLY_DEF(NArrow::NSerialization::TSerializerContainer, Serializer); - std::optional DictionaryEncoding; - std::shared_ptr Loader; - - NArrow::NTransformation::ITransformer::TPtr GetLoadTransformer() const; - - void InitLoader(const TIndexInfo& info); - TColumnFeatures(const ui32 columnId); + using TBase = TSimpleColumnInfo; + YDB_READONLY_DEF(std::shared_ptr, Operator); public: - - TString DebugString() const { - TStringBuilder sb; - sb << "serializer=" << (Serializer ? Serializer->DebugString() : "NO") << ";"; - sb << "encoding=" << (DictionaryEncoding ? DictionaryEncoding->DebugString() : "NO") << ";"; - sb << "loader=" << (Loader ? Loader->DebugString() : "NO") << ";"; - return sb; - } - - NArrow::NTransformation::ITransformer::TPtr GetSaveTransformer() const; - static std::optional BuildFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo, const TIndexInfo& indexInfo); - static TColumnFeatures BuildFromIndexInfo(const ui32 columnId, const TIndexInfo& indexInfo); - - const std::shared_ptr& GetLoader() const { - AFL_VERIFY(Loader); - return Loader; + TColumnFeatures(const ui32 columnId, const std::shared_ptr& arrowField, const NArrow::NSerialization::TSerializerContainer& serializer, + const std::shared_ptr& bOperator, const bool needMinMax, const bool isSorted) + : TBase(columnId, arrowField, serializer, needMinMax, isSorted) + , Operator(bOperator) + { + AFL_VERIFY(Operator); + + } + TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TOlapColumnDescription& columnInfo, const std::shared_ptr& storagesManager) { + auto parsed = TBase::DeserializeFromProto(columnInfo); + if (!parsed) { + return parsed; + } + Operator = storagesManager->GetOperatorVerified(columnInfo.GetStorageId() ? columnInfo.GetStorageId() : IStoragesManager::DefaultStorageId); + return TConclusionStatus::Success(); } }; diff --git a/ydb/core/tx/columnshard/engines/scheme/filtered_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/filtered_scheme.cpp index 52ad20e0da28..30eba88ffdd8 100644 --- a/ydb/core/tx/columnshard/engines/scheme/filtered_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/filtered_scheme.cpp @@ -1,94 +1 @@ #include "filtered_scheme.h" -#include - - -namespace NKikimr::NOlap { - -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds) - : TFilteredSnapshotSchema(originalSnapshot, std::set(columnIds.begin(), columnIds.end())) -{} - -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds) - : OriginalSnapshot(originalSnapshot) - , ColumnIds(columnIds) -{ - std::vector> schemaFields; - for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { - if (!ColumnIds.contains(OriginalSnapshot->GetIndexInfo().GetColumnId(i->name()))) { - continue; - } - schemaFields.emplace_back(i); - } - Schema = std::make_shared(schemaFields); -} - -TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames) - : OriginalSnapshot(originalSnapshot) { - for (auto&& i : columnNames) { - ColumnIds.emplace(OriginalSnapshot->GetColumnId(i)); - } - std::vector> schemaFields; - for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { - if (!columnNames.contains(i->name())) { - continue; - } - schemaFields.emplace_back(i); - } - Schema = std::make_shared(schemaFields); -} - -TColumnSaver TFilteredSnapshotSchema::GetColumnSaver(const ui32 columnId, const TSaverContext& context) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); - return OriginalSnapshot->GetColumnSaver(columnId, context); -} - -std::shared_ptr TFilteredSnapshotSchema::GetColumnLoaderOptional(const ui32 columnId) const { - Y_ABORT_UNLESS(ColumnIds.contains(columnId)); - return OriginalSnapshot->GetColumnLoaderOptional(columnId); -} - -std::optional TFilteredSnapshotSchema::GetColumnIdOptional(const std::string& columnName) const { - return OriginalSnapshot->GetColumnIdOptional(columnName); -} - -int TFilteredSnapshotSchema::GetFieldIndex(const ui32 columnId) const { - if (!ColumnIds.contains(columnId)) { - return -1; - } - TString columnName = OriginalSnapshot->GetIndexInfo().GetColumnName(columnId, false); - if (!columnName) { - return -1; - } - std::string name(columnName.data(), columnName.size()); - return Schema->GetFieldIndex(name); -} - -const std::shared_ptr& TFilteredSnapshotSchema::GetSchema() const { - return Schema; -} - -const TIndexInfo& TFilteredSnapshotSchema::GetIndexInfo() const { - return OriginalSnapshot->GetIndexInfo(); -} - -const TSnapshot& TFilteredSnapshotSchema::GetSnapshot() const { - return OriginalSnapshot->GetSnapshot(); -} - -ui32 TFilteredSnapshotSchema::GetColumnsCount() const { - return Schema->num_fields(); -} - -ui64 TFilteredSnapshotSchema::GetVersion() const { - return OriginalSnapshot->GetIndexInfo().GetVersion(); -} - -TString TFilteredSnapshotSchema::DoDebugString() const { - return TStringBuilder() << "(" - << "original=" << OriginalSnapshot->DebugString() << ";" - << "column_ids=[" << JoinSeq(",", ColumnIds) << "];" - << ")" - ; -} - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/filtered_scheme.h b/ydb/core/tx/columnshard/engines/scheme/filtered_scheme.h index 2e444b26d53f..e0f0bdb537af 100644 --- a/ydb/core/tx/columnshard/engines/scheme/filtered_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/filtered_scheme.h @@ -1,32 +1,2 @@ #pragma once - -#include "abstract_scheme.h" - -#include - -namespace NKikimr::NOlap { - -class TFilteredSnapshotSchema: public ISnapshotSchema { - ISnapshotSchema::TPtr OriginalSnapshot; - std::shared_ptr Schema; - std::set ColumnIds; -protected: - virtual TString DoDebugString() const override; -public: - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds); - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds); - TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames); - - TColumnSaver GetColumnSaver(const ui32 columnId, const TSaverContext& context) const override; - std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; - std::optional GetColumnIdOptional(const std::string& columnName) const override; - int GetFieldIndex(const ui32 columnId) const override; - - const std::shared_ptr& GetSchema() const override; - const TIndexInfo& GetIndexInfo() const override; - const TSnapshot& GetSnapshot() const override; - ui32 GetColumnsCount() const override; - ui64 GetVersion() const override; -}; - -} +#include "versions/filtered_scheme.h" diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp index e60785485be2..c91f2d0d1370 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.cpp @@ -1,17 +1,16 @@ #include "index_info.h" +#include "statistics/abstract/operator.h" +#include + +#include #include -#include -#include #include #include -#include +#include namespace NKikimr::NOlap { -const TString TIndexInfo::STORE_INDEX_STATS_TABLE = TString("/") + NSysView::SysPathName + "/" + NSysView::StorePrimaryIndexStatsName; -const TString TIndexInfo::TABLE_INDEX_STATS_TABLE = TString("/") + NSysView::SysPathName + "/" + NSysView::TablePrimaryIndexStatsName; - static std::vector NamesOnly(const std::vector& columns) { std::vector out; out.reserve(columns.size()); @@ -48,6 +47,10 @@ std::shared_ptr TIndexInfo::AddSpecialColumns(const std::sha return *res; } +ui64 TIndexInfo::GetSpecialColumnsRecordSize() { + return sizeof(ui64) + sizeof(ui64); +} + std::shared_ptr TIndexInfo::ArrowSchemaSnapshot() { static std::shared_ptr result = std::make_shared(arrow::FieldVector{ arrow::field(SPEC_COL_PLAN_STEP, arrow::uint64()), @@ -231,7 +234,7 @@ std::shared_ptr TIndexInfo::ArrowColumnFieldOptional(const ui32 co } } -void TIndexInfo::SetAllKeys() { +void TIndexInfo::SetAllKeys(const std::shared_ptr& operators) { /// @note Setting replace and sorting key to PK we are able to: /// * apply REPLACE by MergeSort /// * apply PK predicate before REPLACE @@ -256,69 +259,14 @@ void TIndexInfo::SetAllKeys() { MinMaxIdxColumnsIds.insert(GetPKFirstColumnId()); if (!Schema) { AFL_VERIFY(!SchemaWithSpecials); - InitializeCaches(); - } -} - -std::shared_ptr TIndexInfo::SortDescription() const { - if (GetPrimaryKey()) { - auto key = ExtendedKey; // Sort with extended key, greater snapshot first - Y_ABORT_UNLESS(key && key->num_fields() > 2); - auto description = std::make_shared(key); - description->Directions[key->num_fields() - 1] = -1; - description->Directions[key->num_fields() - 2] = -1; - description->NotNull = true; // TODO - return description; - } - return {}; -} - -std::shared_ptr TIndexInfo::SortReplaceDescription() const { - if (GetPrimaryKey()) { - auto key = ExtendedKey; // Sort with extended key, greater snapshot first - Y_ABORT_UNLESS(key && key->num_fields() > 2); - auto description = std::make_shared(key, GetPrimaryKey()); - description->Directions[key->num_fields() - 1] = -1; - description->Directions[key->num_fields() - 2] = -1; - description->NotNull = true; // TODO - return description; - } - return {}; -} - -bool TIndexInfo::AllowTtlOverColumn(const TString& name) const { - auto it = ColumnNames.find(name); - if (it == ColumnNames.end()) { - return false; - } - return MinMaxIdxColumnsIds.contains(it->second); -} - -TColumnSaver TIndexInfo::GetColumnSaver(const ui32 columnId, const TSaverContext& context) const { - NArrow::NTransformation::ITransformer::TPtr transformer; - NArrow::NSerialization::TSerializerContainer serializer; - { - auto it = ColumnFeatures.find(columnId); - AFL_VERIFY(it != ColumnFeatures.end()); - transformer = it->second.GetSaveTransformer(); - serializer = it->second.GetSerializer(); - } - - if (!!context.GetExternalSerializer()) { - return TColumnSaver(transformer, *context.GetExternalSerializer()); - } else if (!!serializer) { - return TColumnSaver(transformer, serializer); - } else if (DefaultSerializer) { - return TColumnSaver(transformer, DefaultSerializer); - } else { - return TColumnSaver(transformer, NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer()); + InitializeCaches(operators); } } -std::shared_ptr TIndexInfo::GetColumnLoaderVerified(const ui32 columnId) const { - auto result = GetColumnLoaderOptional(columnId); - AFL_VERIFY(result); - return result; +TColumnSaver TIndexInfo::GetColumnSaver(const ui32 columnId) const { + auto it = ColumnFeatures.find(columnId); + AFL_VERIFY(it != ColumnFeatures.end()); + return it->second.GetColumnSaver(); } std::shared_ptr TIndexInfo::GetColumnLoaderOptional(const ui32 columnId) const { @@ -364,18 +312,43 @@ std::shared_ptr TIndexInfo::GetColumnSchema(const ui32 columnId) return GetColumnsSchema({columnId}); } -bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema) { +bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators) { if (schema.GetEngine() != NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES) { AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "incorrect_engine_in_schema"); return false; } + { + SchemeNeedActualization = schema.GetOptions().GetSchemeNeedActualization(); + } + + if (schema.HasDefaultCompression()) { + NArrow::NSerialization::TSerializerContainer container; + if (!container.DeserializeFromProto(schema.GetDefaultCompression())) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "cannot_parse_default_serializer"); + return false; + } + DefaultSerializer = container; + } + + { + for (const auto& stat : schema.GetStatistics()) { + NStatistics::TOperatorContainer container; + AFL_VERIFY(container.DeserializeFromProto(stat)); + AFL_VERIFY(StatisticsByName.emplace(container.GetName(), std::move(container)).second); + } + NStatistics::TPortionStorageCursor cursor; + for (auto&& [_, container] : StatisticsByName) { + container.SetCursor(cursor); + container->ShiftCursor(cursor); + } + } + for (const auto& idx : schema.GetIndexes()) { NIndexes::TIndexMetaContainer meta; AFL_VERIFY(meta.DeserializeFromProto(idx)); Indexes.emplace(meta->GetIndexId(), meta); } - for (const auto& col : schema.GetColumns()) { const ui32 id = col.GetId(); const TString& name = col.GetName(); @@ -384,31 +357,22 @@ bool TIndexInfo::DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& Columns[id] = NTable::TColumn(name, id, typeInfoMod.TypeInfo, typeInfoMod.TypeMod, notNull); ColumnNames[name] = id; } - InitializeCaches(); - for (const auto& col : schema.GetColumns()) { - std::optional cFeatures = TColumnFeatures::BuildFromProto(col, *this); - if (!cFeatures) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature"); - return false; - } - auto it = ColumnFeatures.find(col.GetId()); - AFL_VERIFY(it != ColumnFeatures.end()); - it->second = *cFeatures; - } - for (const auto& keyName : schema.GetKeyColumnNames()) { Y_ABORT_UNLESS(ColumnNames.contains(keyName)); KeyColumns.push_back(ColumnNames[keyName]); } - - if (schema.HasDefaultCompression()) { - NArrow::NSerialization::TSerializerContainer container; - if (!container.DeserializeFromProto(schema.GetDefaultCompression())) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_index_info")("reason", "cannot_parse_default_serializer"); + InitializeCaches(operators); + for (const auto& col : schema.GetColumns()) { + auto it = ColumnFeatures.find(col.GetId()); + AFL_VERIFY(it != ColumnFeatures.end()); + auto parsed = it->second.DeserializeFromProto(col, operators); + if (!parsed) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_column_feature")("reason", parsed.GetErrorMessage()); return false; } - DefaultSerializer = container; } + + Version = schema.GetVersion(); return true; } @@ -433,7 +397,9 @@ std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSche const auto& column = it->second; std::string colName(column.Name.data(), column.Name.size()); - fields.emplace_back(std::make_shared(colName, NArrow::GetArrowType(column.PType), !column.NotNull)); + auto arrowType = NArrow::GetArrowType(column.PType); + AFL_VERIFY(arrowType.ok()); + fields.emplace_back(std::make_shared(colName, arrowType.ValueUnsafe(), !column.NotNull)); } return std::make_shared(std::move(fields)); @@ -450,9 +416,9 @@ std::vector GetColumns(const NTable::TScheme::TTableSchema& table return out; } -std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema) { +std::optional TIndexInfo::BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators) { TIndexInfo result(""); - if (!result.DeserializeFromProto(schema)) { + if (!result.DeserializeFromProto(schema, operators)) { return std::nullopt; } return result; @@ -462,18 +428,46 @@ std::shared_ptr TIndexInfo::SpecialColumnField(const ui32 columnId return ArrowSchemaSnapshot()->GetFieldByName(GetColumnName(columnId, true)); } -void TIndexInfo::InitializeCaches() { +void TIndexInfo::InitializeCaches(const std::shared_ptr& operators) { BuildArrowSchema(); BuildSchemaWithSpecials(); for (auto&& c : Columns) { AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(c.first, GetColumnFieldVerified(c.first)).second); - AFL_VERIFY(ColumnFeatures.emplace(c.first, TColumnFeatures::BuildFromIndexInfo(c.first, *this)).second); + AFL_VERIFY(ColumnFeatures.emplace(c.first, TColumnFeatures(c.first, GetColumnFieldVerified(c.first), DefaultSerializer, operators->GetDefaultOperator(), + NArrow::IsPrimitiveYqlType(c.second.PType), c.first == GetPKFirstColumnId())).second); } for (auto&& cId : GetSpecialColumnIds()) { AFL_VERIFY(ArrowColumnByColumnIdCache.emplace(cId, GetColumnFieldVerified(cId)).second); - AFL_VERIFY(ColumnFeatures.emplace(cId, TColumnFeatures::BuildFromIndexInfo(cId, *this)).second); + AFL_VERIFY(ColumnFeatures.emplace(cId, TColumnFeatures(cId, GetColumnFieldVerified(cId), DefaultSerializer, operators->GetDefaultOperator(), false, false)).second); + } +} + +std::vector> TIndexInfo::MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const { + std::vector> result; + auto columnArrowSchema = GetColumnSchema(columnId); + TColumnSaver saver = GetColumnSaver(columnId); + ui32 idx = 0; + for (auto p : pages) { + auto arr = NArrow::MakeEmptyBatch(columnArrowSchema, p); + AFL_VERIFY(arr->num_columns() == 1)("count", arr->num_columns()); + result.emplace_back(std::make_shared(saver.Apply(arr), arr->column(0), TChunkAddress(columnId, idx), columnInfo)); + ++idx; + } + return result; +} + +NSplitter::TEntityGroups TIndexInfo::GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const { + NSplitter::TEntityGroups groups(storages.GetDefaultOperator()->GetBlobSplitSettings(), IStoragesManager::DefaultStorageId); + for (auto&& i : GetEntityIds()) { + auto storageId = GetEntityStorageId(i, specialTier); + auto* group = groups.GetGroupOptional(storageId); + if (!group) { + group = &groups.RegisterGroup(storageId, storages.GetOperatorVerified(storageId)->GetBlobSplitSettings()); + } + group->AddEntity(i); } + return groups; } } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/index_info.h b/ydb/core/tx/columnshard/engines/scheme/index_info.h index 7409e33bd910..a5c63814256e 100644 --- a/ydb/core/tx/columnshard/engines/scheme/index_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/index_info.h @@ -3,6 +3,11 @@ #include "column_features.h" #include "tier_info.h" +#include "abstract/index_info.h" +#include "indexes/abstract/meta.h" +#include "statistics/abstract/operator.h" +#include "statistics/abstract/common.h" + #include #include @@ -12,7 +17,6 @@ #include #include #include -#include "indexes/abstract/meta.h" namespace arrow { class Array; @@ -20,43 +24,96 @@ namespace arrow { class Schema; } -namespace NKikimr::NArrow { - struct TSortDescription; -} - namespace NKikimr::NOlap { +class TPortionInfoWithBlobs; struct TInsertedData; class TSnapshotColumnInfo; +class ISnapshotSchema; using TNameTypeInfo = std::pair; /// Column engine index description in terms of tablet's local table. /// We have to use YDB types for keys here. -struct TIndexInfo : public NTable::TScheme::TTableSchema { +struct TIndexInfo : public NTable::TScheme::TTableSchema, public IIndexInfo { private: THashMap ColumnFeatures; THashMap> ArrowColumnByColumnIdCache; THashMap Indexes; + std::map StatisticsByName; TIndexInfo(const TString& name); - bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema); + bool SchemeNeedActualization = false; + bool DeserializeFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators); TColumnFeatures& GetOrCreateColumnFeatures(const ui32 columnId) const; void BuildSchemaWithSpecials(); void BuildArrowSchema(); - void InitializeCaches(); + void InitializeCaches(const std::shared_ptr& operators); public: - static constexpr const char* SPEC_COL_PLAN_STEP = NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP; - static constexpr const char* SPEC_COL_TX_ID = NOlap::NPortion::TSpecialColumns::SPEC_COL_TX_ID; - static const TString STORE_INDEX_STATS_TABLE; - static const TString TABLE_INDEX_STATS_TABLE; + const TColumnFeatures& GetColumnFeaturesVerified(const ui32 columnId) const { + auto it = ColumnFeatures.find(columnId); + AFL_VERIFY(it != ColumnFeatures.end()); + return it->second; + } + + NSplitter::TEntityGroups GetEntityGroupsByStorageId(const TString& specialTier, const IStoragesManager& storages) const; + + bool GetSchemeNeedActualization() const { + return SchemeNeedActualization; + } + + std::set GetUsedStorageIds(const TString& portionTierName) const { + std::set result; + if (portionTierName && portionTierName != IStoragesManager::DefaultStorageId) { + result.emplace(portionTierName); + } else { + for (auto&& i : ColumnFeatures) { + result.emplace(i.second.GetOperator()->GetStorageId()); + } + } + return result; + } + + std::vector> MakeEmptyChunks(const ui32 columnId, const std::vector& pages, const TSimpleColumnInfo& columnInfo) const; + + const std::map& GetStatisticsByName() const { + return StatisticsByName; + } + + NStatistics::TOperatorContainer GetStatistics(const NStatistics::TIdentifier& id) const { + for (auto&& i : StatisticsByName) { + if (i.second->GetIdentifier() == id) { + return i.second; + } + } + return NStatistics::TOperatorContainer(); + } const THashMap& GetIndexes() const { return Indexes; } - enum class ESpecialColumn : ui32 { - PLAN_STEP = NOlap::NPortion::TSpecialColumns::SPEC_COL_PLAN_STEP_INDEX, - TX_ID = NOlap::NPortion::TSpecialColumns::SPEC_COL_TX_ID_INDEX - }; + const TString& GetIndexStorageId(const ui32 indexId) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + return it->second->GetStorageId(); + } + + const TString& GetColumnStorageId(const ui32 columnId, const TString& specialTier) const { + if (specialTier && specialTier != IStoragesManager::DefaultStorageId) { + return specialTier; + } else { + auto it = ColumnFeatures.find(columnId); + AFL_VERIFY(it != ColumnFeatures.end()); + return it->second.GetOperator()->GetStorageId(); + } + } + + const TString& GetEntityStorageId(const ui32 entityId, const TString& specialTier) const { + auto it = Indexes.find(entityId); + if (it != Indexes.end()) { + return it->second->GetStorageId(); + } + return GetColumnStorageId(entityId, specialTier); + } TString DebugString() const { TStringBuilder sb; @@ -77,6 +134,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { /// Makes schema as set of the special columns. static std::shared_ptr ArrowSchemaSnapshot(); + static ui64 GetSpecialColumnsRecordSize(); /// Matches name of the filed with names of the special columns. static bool IsSpecialColumn(const arrow::Field& field); @@ -102,20 +160,35 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { return result; } - static std::optional BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema); + std::vector> ActualizeColumnData(const std::vector>& source, const TIndexInfo& sourceIndexInfo, const ui32 columnId) const { + auto itCurrent = ColumnFeatures.find(columnId); + auto itPred = sourceIndexInfo.ColumnFeatures.find(columnId); + AFL_VERIFY(itCurrent != ColumnFeatures.end()); + AFL_VERIFY(itPred != sourceIndexInfo.ColumnFeatures.end()); + return itCurrent->second.ActualizeColumnData(source, itPred->second); + } + + static std::optional BuildFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema, const std::shared_ptr& operators); static const std::vector& SnapshotColumnNames() { static std::vector result = {SPEC_COL_PLAN_STEP, SPEC_COL_TX_ID}; return result; } + bool HasColumnId(const ui32 columnId) const { + return ColumnFeatures.contains(columnId); + } + + bool HasIndexId(const ui32 indexId) const { + return Indexes.contains(indexId); + } + std::shared_ptr GetColumnFieldOptional(const ui32 columnId) const; std::shared_ptr GetColumnFieldVerified(const ui32 columnId) const; std::shared_ptr GetColumnSchema(const ui32 columnId) const; std::shared_ptr GetColumnsSchema(const std::set& columnIds) const; - TColumnSaver GetColumnSaver(const ui32 columnId, const TSaverContext& context) const; - std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const; - std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; + TColumnSaver GetColumnSaver(const ui32 columnId) const; + virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; std::optional GetColumnNameOptional(const ui32 columnId) const { auto f = GetColumnFieldOptional(columnId); if (!f) { @@ -140,13 +213,20 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { return meta->GetIndexName(); } - void AppendIndexes(std::map>>& originalData) const { + void AppendIndexes(THashMap>>& originalData) const { for (auto&& i : Indexes) { std::shared_ptr chunk = i.second->BuildIndex(i.first, originalData, *this); AFL_VERIFY(originalData.emplace(i.first, std::vector>({chunk})).second); } } + void AppendIndex(THashMap>>& originalData, const ui32 indexId) const { + auto it = Indexes.find(indexId); + AFL_VERIFY(it != Indexes.end()); + std::shared_ptr chunk = it->second->BuildIndex(indexId, originalData, *this); + AFL_VERIFY(originalData.emplace(indexId, std::vector>({chunk})).second); + } + /// Returns an id of the column located by name. The name should exists in the schema. ui32 GetColumnId(const std::string& name) const; std::optional GetColumnIdOptional(const std::string& name) const; @@ -157,6 +237,13 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { /// Returns names of columns defined by the specific ids. std::vector GetColumnNames(const std::vector& ids) const; std::vector GetColumnIds() const; + std::vector GetEntityIds() const { + auto result = GetColumnIds(); + for (auto&& i : Indexes) { + result.emplace_back(i.first); + } + return result; + } /// Returns info of columns defined by specific ids. std::vector GetColumns(const std::vector& ids) const; @@ -179,7 +266,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { const std::shared_ptr& GetPrimaryKey() const { return PrimaryKey; } /// Initializes sorting, replace, index and extended keys. - void SetAllKeys(); + void SetAllKeys(const std::shared_ptr& operators); void CheckTtlColumn(const TString& ttlColumn) const { Y_ABORT_UNLESS(!ttlColumn.empty()); @@ -213,19 +300,6 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { bool IsSorted() const { return true; } bool IsSortedColumn(const ui32 columnId) const { return GetPKFirstColumnId() == columnId; } - std::shared_ptr SortDescription() const; - std::shared_ptr SortReplaceDescription() const; - - static const std::vector& GetSpecialColumnNames() { - static const std::vector result = { std::string(SPEC_COL_PLAN_STEP), std::string(SPEC_COL_TX_ID) }; - return result; - } - - static const std::vector& GetSpecialColumnIds() { - static const std::vector result = {(ui32)ESpecialColumn::PLAN_STEP, (ui32)ESpecialColumn::TX_ID}; - return result; - } - static const std::set& GetSpecialColumnIdsSet() { static const std::set result(GetSpecialColumnIds().begin(), GetSpecialColumnIds().end()); return result; @@ -236,6 +310,9 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { } bool CheckCompatible(const TIndexInfo& other) const; + NArrow::NSerialization::TSerializerContainer GetDefaultSerializer() const { + return DefaultSerializer; + } private: ui64 Version = 0; @@ -246,7 +323,7 @@ struct TIndexInfo : public NTable::TScheme::TTableSchema { std::shared_ptr ExtendedKey; // Extend PK with snapshot columns to allow old shapshot reads THashSet RequiredColumns; THashSet MinMaxIdxColumnsIds; - NArrow::NSerialization::TSerializerContainer DefaultSerializer; + NArrow::NSerialization::TSerializerContainer DefaultSerializer = NArrow::NSerialization::TSerializerContainer::GetDefaultSerializer(); }; std::shared_ptr MakeArrowSchema(const NTable::TScheme::TTableSchema::TColumns& columns, const std::vector& ids, bool withSpecials = false); diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp index cf87cf941d8f..f645bd4a308b 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.cpp @@ -1,7 +1,9 @@ #include "meta.h" +#include #include #include #include +#include #include #include #include @@ -9,11 +11,12 @@ namespace NKikimr::NOlap::NIndexes { -void TPortionIndexChunk::DoAddIntoPortion(const TBlobRange& bRange, TPortionInfo& portionInfo) const { - portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdx(), RecordsCount, RawBytes, bRange)); +void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfo& portionInfo) const { + AFL_VERIFY(!bRange.IsValid()); + portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); } -std::shared_ptr TIndexByColumns::DoBuildIndex(const ui32 indexId, std::map>>& data, const TIndexInfo& indexInfo) const { +std::shared_ptr TIndexByColumns::DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const { AFL_VERIFY(Serializer); AFL_VERIFY(data.size()); std::vector columnReaders; @@ -29,7 +32,7 @@ std::shared_ptr TIndexByColumns::DoBuildIndex TChunkedBatchReader reader(std::move(columnReaders)); std::shared_ptr indexBatch = DoBuildIndexImpl(reader); const TString indexData = Serializer->SerializeFull(indexBatch); - return std::make_shared(indexId, recordsCount, NArrow::GetBatchDataSize(indexBatch), indexData); + return std::make_shared(TChunkAddress(indexId, 0), recordsCount, NArrow::GetBatchDataSize(indexBatch), indexData); } bool TIndexByColumns::DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) { @@ -60,4 +63,13 @@ NKikimr::TConclusionStatus TIndexByColumns::CheckSameColumnsForModification(cons return TConclusionStatus::Success(); } +bool IIndexMeta::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) { + IndexId = proto.GetId(); + AFL_VERIFY(IndexId); + IndexName = proto.GetName(); + AFL_VERIFY(IndexName); + StorageId = proto.GetStorageId() ? proto.GetStorageId() : IStoragesManager::DefaultStorageId; + return DoDeserializeFromProto(proto); +} + } // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h index 9966d4ee31ef..1f414cdc0301 100644 --- a/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h +++ b/ydb/core/tx/columnshard/engines/scheme/indexes/abstract/meta.h @@ -28,8 +28,9 @@ class IIndexMeta { private: YDB_READONLY_DEF(TString, IndexName); YDB_READONLY(ui32, IndexId, 0); + YDB_READONLY(TString, StorageId, IStoragesManager::DefaultStorageId); protected: - virtual std::shared_ptr DoBuildIndex(const ui32 indexId, std::map>>& data, const TIndexInfo& indexInfo) const = 0; + virtual std::shared_ptr DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const = 0; virtual void DoFillIndexCheckers(const std::shared_ptr& info, const NSchemeShard::TOlapSchema& schema) const = 0; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) = 0; virtual void DoSerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const = 0; @@ -59,7 +60,7 @@ class IIndexMeta { virtual ~IIndexMeta() = default; - std::shared_ptr BuildIndex(const ui32 indexId, std::map>>& data, const TIndexInfo& indexInfo) const { + std::shared_ptr BuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const { return DoBuildIndex(indexId, data, indexInfo); } @@ -67,19 +68,16 @@ class IIndexMeta { return DoFillIndexCheckers(info, schema); } - bool DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto) { - IndexId = proto.GetId(); - AFL_VERIFY(IndexId); - IndexName = proto.GetName(); - AFL_VERIFY(IndexName); - return DoDeserializeFromProto(proto); - } + bool DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& proto); void SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& proto) const { AFL_VERIFY(IndexId); proto.SetId(IndexId); AFL_VERIFY(IndexName); proto.SetName(IndexName); + if (StorageId) { + proto.SetStorageId(StorageId); + } return DoSerializeToProto(proto); } @@ -126,10 +124,10 @@ class TPortionIndexChunk: public IPortionDataChunk { virtual std::shared_ptr DoGetLastScalar() const override { return nullptr; } - virtual void DoAddIntoPortion(const TBlobRange& bRange, TPortionInfo& portionInfo) const override; + virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfo& portionInfo) const override; public: - TPortionIndexChunk(const ui32 entityId, const ui32 recordsCount, const ui64 rawBytes, const TString& data) - : TBase(entityId, 0) + TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) + : TBase(address.GetColumnId(), address.GetChunkIdx()) , RecordsCount(recordsCount) , RawBytes(rawBytes) , Data(data) @@ -146,7 +144,7 @@ class TIndexByColumns: public IIndexMeta { std::set ColumnIds; virtual std::shared_ptr DoBuildIndexImpl(TChunkedBatchReader& reader) const = 0; - virtual std::shared_ptr DoBuildIndex(const ui32 indexId, std::map>>& data, const TIndexInfo& indexInfo) const override final; + virtual std::shared_ptr DoBuildIndex(const ui32 indexId, THashMap>>& data, const TIndexInfo& indexInfo) const override final; virtual bool DoDeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& /*proto*/) override; TConclusionStatus CheckSameColumnsForModification(const IIndexMeta& newMeta) const; diff --git a/ydb/core/tx/columnshard/engines/scheme/snapshot_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/snapshot_scheme.cpp index 19f9c39cc438..dd1148e52cf5 100644 --- a/ydb/core/tx/columnshard/engines/scheme/snapshot_scheme.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/snapshot_scheme.cpp @@ -1,53 +1 @@ #include "snapshot_scheme.h" - -namespace NKikimr::NOlap { - -TSnapshotSchema::TSnapshotSchema(TIndexInfo&& indexInfo, const TSnapshot& snapshot) - : IndexInfo(std::move(indexInfo)) - , Schema(IndexInfo.ArrowSchemaWithSpecials()) - , Snapshot(snapshot) -{ -} - -TColumnSaver TSnapshotSchema::GetColumnSaver(const ui32 columnId, const TSaverContext& context) const { - return IndexInfo.GetColumnSaver(columnId, context); -} - -std::shared_ptr TSnapshotSchema::GetColumnLoaderOptional(const ui32 columnId) const { - return IndexInfo.GetColumnLoaderOptional(columnId); -} - -std::optional TSnapshotSchema::GetColumnIdOptional(const std::string& columnName) const { - return IndexInfo.GetColumnIdOptional(columnName); -} - -int TSnapshotSchema::GetFieldIndex(const ui32 columnId) const { - const TString& columnName = IndexInfo.GetColumnName(columnId, false); - if (!columnName) { - return -1; - } - std::string name(columnName.data(), columnName.size()); - return Schema->GetFieldIndex(name); -} - -const std::shared_ptr& TSnapshotSchema::GetSchema() const { - return Schema; -} - -const TIndexInfo& TSnapshotSchema::GetIndexInfo() const { - return IndexInfo; -} - -const TSnapshot& TSnapshotSchema::GetSnapshot() const { - return Snapshot; -} - -ui32 TSnapshotSchema::GetColumnsCount() const { - return Schema->num_fields(); -} - -ui64 TSnapshotSchema::GetVersion() const { - return IndexInfo.GetVersion(); -} - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/snapshot_scheme.h b/ydb/core/tx/columnshard/engines/scheme/snapshot_scheme.h index 1104e507729b..b94ede6cb16e 100644 --- a/ydb/core/tx/columnshard/engines/scheme/snapshot_scheme.h +++ b/ydb/core/tx/columnshard/engines/scheme/snapshot_scheme.h @@ -1,38 +1,2 @@ #pragma once - -#include "abstract_scheme.h" - -#include - -namespace NKikimr::NOlap { - -class TSnapshotSchema: public ISnapshotSchema { -private: - TIndexInfo IndexInfo; - std::shared_ptr Schema; - TSnapshot Snapshot; -protected: - virtual TString DoDebugString() const override { - return TStringBuilder() << "(" - "schema=" << Schema->ToString() << ";" << - "snapshot=" << Snapshot.DebugString() << ";" << - "index_info=" << IndexInfo.DebugString() << ";" << - ")" - ; - } -public: - TSnapshotSchema(TIndexInfo&& indexInfo, const TSnapshot& snapshot); - - TColumnSaver GetColumnSaver(const ui32 columnId, const TSaverContext& context) const override; - std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; - std::optional GetColumnIdOptional(const std::string& columnName) const override; - int GetFieldIndex(const ui32 columnId) const override; - - const std::shared_ptr& GetSchema() const override; - const TIndexInfo& GetIndexInfo() const override; - const TSnapshot& GetSnapshot() const override; - ui32 GetColumnsCount() const override; - ui64 GetVersion() const override; -}; - -} +#include "versions/snapshot_scheme.h" diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp new file mode 100644 index 000000000000..e7960e66809e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.cpp @@ -0,0 +1,40 @@ +#include "common.h" +#include + +namespace NKikimr::NOlap::NStatistics { + +TIdentifier::TIdentifier(const EType type, const std::vector& entities) + : Type(type) + , EntityIds(entities) +{ + AFL_VERIFY(EntityIds.size()); +} + +bool TIdentifier::operator<(const TIdentifier& item) const { + if (Type != item.Type) { + return (ui32)Type < (ui32)item.Type; + } + for (ui32 i = 0; i < std::min(EntityIds.size(), item.EntityIds.size()); ++i) { + if (EntityIds[i] < item.EntityIds[i]) { + return true; + } + } + return false; +} + +bool TIdentifier::operator==(const TIdentifier& item) const { + if (Type != item.Type) { + return false; + } + if (EntityIds.size() != item.EntityIds.size()) { + return false; + } + for (ui32 i = 0; i < EntityIds.size(); ++i) { + if (EntityIds[i] != item.EntityIds[i]) { + return false; + } + } + return true; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h new file mode 100644 index 000000000000..abfd7159a97b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/common.h @@ -0,0 +1,24 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NStatistics { +enum class EType { + Undefined /* "undefined" */, + Max /* "max" */, + Variability /* "variability" */ +}; + +class TIdentifier { +private: + YDB_READONLY(EType, Type, EType::Undefined); + YDB_READONLY_DEF(std::vector, EntityIds); +public: + TIdentifier(const EType type, const std::vector& entities); + + bool operator<(const TIdentifier& item) const; + bool operator==(const TIdentifier& item) const; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp new file mode 100644 index 000000000000..5713317c7d21 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.cpp @@ -0,0 +1,5 @@ +#include "constructor.h" + +namespace NKikimr::NOlap::NStatistics { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h new file mode 100644 index 000000000000..8948e93d482c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/constructor.h @@ -0,0 +1,73 @@ +#pragma once +#include "common.h" +#include "portion_storage.h" +#include "operator.h" + +#include + +namespace NKikimr::NSchemeShard { +class TOlapSchema; +} + +namespace NKikimrColumnShardStatisticsProto { +class TOperatorContainer; +} + +namespace NKikimr::NOlap::NStatistics { + +class IConstructor { +private: + YDB_READONLY(EType, Type, EType::Undefined); + IConstructor() = default; +protected: + virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const = 0; + virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) = 0; + virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const = 0; + virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) = 0; +public: + using TProto = NKikimrColumnShardStatisticsProto::TConstructorContainer; + using TFactory = NObjectFactory::TObjectFactory; + + virtual ~IConstructor() = default; + + IConstructor(const EType type) + :Type(type) { + + } + + TConclusionStatus DeserializeFromJson(const NJson::TJsonValue& jsonData) { + return DoDeserializeFromJson(jsonData); + } + + TConclusion CreateOperator(const TString& name, const NSchemeShard::TOlapSchema& currentSchema) const { + auto result = DoCreateOperator(currentSchema); + if (!result) { + return result.GetError(); + } + return TOperatorContainer(name, result.DetachResult()); + } + + TString GetClassName() const { + return ::ToString(Type); + } + + bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { + if (!TryFromString(proto.GetClassName(), Type)) { + return false; + } + return DoDeserializeFromProto(proto); + } + + void SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { + return DoSerializeToProto(proto); + } +}; + +class TConstructorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; +public: + using TBase::TBase; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp new file mode 100644 index 000000000000..357d8bbd3934 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.cpp @@ -0,0 +1,12 @@ +#include "operator.h" + +namespace NKikimr::NOlap::NStatistics { + +bool IOperator::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { + if (!TryFromString(proto.GetClassName(), Type)) { + return false; + } + return DoDeserializeFromProto(proto); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h new file mode 100644 index 000000000000..29f6f6744ac4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/operator.h @@ -0,0 +1,124 @@ +#pragma once +#include "common.h" +#include "portion_storage.h" + +#include +#include + +#include +#include + +#include + +namespace NKikimr::NOlap { +class IPortionDataChunk; +} + +namespace NKikimr::NOlap::NStatistics { + +class IOperator { +private: + YDB_READONLY(EType, Type, EType::Undefined); + IOperator() = default; +protected: + virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const = 0; + virtual void DoShiftCursor(TPortionStorageCursor& cursor) const = 0; + virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) = 0; + virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const = 0; + virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const = 0; +public: + using TProto = NKikimrColumnShardStatisticsProto::TOperatorContainer; + using TFactory = NObjectFactory::TObjectFactory; + + virtual ~IOperator() = default; + + virtual std::vector GetEntityIds() const = 0; + + IOperator(const EType type) + :Type(type) { + + } + + void ShiftCursor(TPortionStorageCursor& cursor) const { + DoShiftCursor(cursor); + } + + void CopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const { + return DoCopyData(cursor, portionStatsFrom, portionStatsTo); + } + + void FillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { + DoFillStatisticsData(data, portionStats, index); + } + + TString GetClassName() const { + return ::ToString(Type); + } + + TIdentifier GetIdentifier() const { + return TIdentifier(Type, GetEntityIds()); + } + + bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto); + + void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { + return DoSerializeToProto(proto); + } +}; + +class TOperatorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + YDB_READONLY_DEF(TString, Name); + std::optional Cursor; + using TBase = NBackgroundTasks::TInterfaceProtoContainer; +public: + TOperatorContainer() = default; + + TOperatorContainer(const TString& name, const std::shared_ptr& object) + : TBase(object) + , Name(name) + { + AFL_VERIFY(Name); + } + + const TPortionStorageCursor& GetCursorVerified() const { + AFL_VERIFY(Cursor); + return *Cursor; + } + + void SetCursor(const TPortionStorageCursor& cursor) { + AFL_VERIFY(!Cursor); + Cursor = cursor; + } + + std::shared_ptr GetScalarVerified(const TPortionStorage& storage) { + AFL_VERIFY(!!Cursor); + return storage.GetScalarVerified(*Cursor); + } + + NKikimrColumnShardStatisticsProto::TOperatorContainer SerializeToProto() const { + NKikimrColumnShardStatisticsProto::TOperatorContainer result = TBase::SerializeToProto(); + result.SetName(Name); + AFL_VERIFY(Name); + return result; + } + + void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { + TBase::SerializeToProto(proto); + proto.SetName(Name); + AFL_VERIFY(Name); + } + + bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { + Name = proto.GetName(); + if (!Name) { + return false; + } + if (!TBase::DeserializeFromProto(proto)) { + return false; + } + return true; + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp new file mode 100644 index 000000000000..f0d67ecf7d42 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.cpp @@ -0,0 +1,119 @@ +#include "portion_storage.h" +#include +#include + +namespace NKikimr::NOlap::NStatistics { + +NKikimrColumnShardStatisticsProto::TScalar TPortionStorage::ScalarToProto(const arrow::Scalar& scalar) { + NKikimrColumnShardStatisticsProto::TScalar result; + switch (scalar.type->id()) { + case arrow::Type::BOOL: + result.SetBool(static_cast(scalar).value); + break; + case arrow::Type::UINT8: + result.SetUint8(static_cast(scalar).value); + break; + case arrow::Type::UINT16: + result.SetUint16(static_cast(scalar).value); + break; + case arrow::Type::UINT32: + result.SetUint32(static_cast(scalar).value); + break; + case arrow::Type::UINT64: + result.SetUint64(static_cast(scalar).value); + break; + case arrow::Type::INT8: + result.SetInt8(static_cast(scalar).value); + break; + case arrow::Type::INT16: + result.SetInt16(static_cast(scalar).value); + break; + case arrow::Type::INT32: + result.SetInt32(static_cast(scalar).value); + break; + case arrow::Type::INT64: + result.SetInt64(static_cast(scalar).value); + break; + case arrow::Type::DOUBLE: + result.SetDouble(static_cast(scalar).value); + break; + case arrow::Type::FLOAT: + result.SetFloat(static_cast(scalar).value); + break; + case arrow::Type::TIMESTAMP: + { + auto* ts = result.MutableTimestamp(); + ts->SetValue(static_cast(scalar).value); + ts->SetUnit(static_cast(*scalar.type).unit()); + break; + } + default: + AFL_VERIFY(false)("problem", "incorrect type for statistics usage")("type", scalar.type->ToString()); + } + return result; +} + +std::shared_ptr TPortionStorage::ProtoToScalar(const NKikimrColumnShardStatisticsProto::TScalar& proto) { + if (proto.HasBool()) { + return std::make_shared(proto.GetBool()); + } else if (proto.HasUint8()) { + return std::make_shared(proto.GetUint8()); + } else if (proto.HasUint16()) { + return std::make_shared(proto.GetUint16()); + } else if (proto.HasUint32()) { + return std::make_shared(proto.GetUint32()); + } else if (proto.HasUint64()) { + return std::make_shared(proto.GetUint64()); + } else if (proto.HasInt8()) { + return std::make_shared(proto.GetInt8()); + } else if (proto.HasInt16()) { + return std::make_shared(proto.GetInt16()); + } else if (proto.HasInt32()) { + return std::make_shared(proto.GetInt32()); + } else if (proto.HasInt64()) { + return std::make_shared(proto.GetInt64()); + } else if (proto.HasDouble()) { + return std::make_shared(proto.GetDouble()); + } else if (proto.HasFloat()) { + return std::make_shared(proto.GetFloat()); + } else if (proto.HasTimestamp()) { + arrow::TimeUnit::type unit = arrow::TimeUnit::type(proto.GetTimestamp().GetUnit()); + return std::make_shared(proto.GetTimestamp().GetValue(), std::make_shared(unit)); + } + AFL_VERIFY(false)("problem", "incorrect statistics proto")("proto", proto.DebugString()); + return nullptr; +} + +std::shared_ptr TPortionStorage::GetScalarVerified(const TPortionStorageCursor& cursor) const { + AFL_VERIFY(cursor.GetScalarsPosition() < Data.size()); + AFL_VERIFY(Data[cursor.GetScalarsPosition()]); + return Data[cursor.GetScalarsPosition()]; +} + +void TPortionStorage::AddScalar(const std::shared_ptr& scalar) { + const auto type = scalar->type->id(); + AFL_VERIFY(type == arrow::Type::BOOL || + type == arrow::Type::UINT8 || type == arrow::Type::UINT16 || type == arrow::Type::UINT32 || type == arrow::Type::UINT64 || + type == arrow::Type::INT8 || type == arrow::Type::INT16 || type == arrow::Type::INT32 || type == arrow::Type::INT64 || + type == arrow::Type::DOUBLE || type == arrow::Type::TIMESTAMP || type == arrow::Type::FLOAT) + ("problem", "incorrect_stat_type")("incoming", scalar->type->ToString()); + Data.emplace_back(scalar); +} + +NKikimrColumnShardStatisticsProto::TPortionStorage TPortionStorage::SerializeToProto() const { + NKikimrColumnShardStatisticsProto::TPortionStorage result; + for (auto&& i : Data) { + AFL_VERIFY(i); + *result.AddScalars() = ScalarToProto(*i); + } + return result; +} + +NKikimr::TConclusionStatus TPortionStorage::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto) { + for (auto&& i : proto.GetScalars()) { + Data.emplace_back(ProtoToScalar(i)); + } + return TConclusionStatus::Success(); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h new file mode 100644 index 000000000000..a3e4b6bcb0dd --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/portion_storage.h @@ -0,0 +1,53 @@ +#pragma once + +#include +#include +#include + +#include + +namespace NKikimrColumnShardStatisticsProto { +class TScalar; +class TPortionStorage; +} + +namespace NKikimr::NOlap::NStatistics { +class TPortionStorageCursor { +private: + YDB_READONLY(ui32, ScalarsPosition, 0); +public: + TPortionStorageCursor() = default; + + void AddScalarsPosition(const ui32 shift) { + ScalarsPosition += shift; + } +}; + +class TPortionStorage { +private: + YDB_READONLY_DEF(std::vector>, Data); + static NKikimrColumnShardStatisticsProto::TScalar ScalarToProto(const arrow::Scalar& value); + static std::shared_ptr ProtoToScalar(const NKikimrColumnShardStatisticsProto::TScalar& proto); + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto); + +public: + bool IsEmpty() const { + return Data.empty(); + } + + std::shared_ptr GetScalarVerified(const TPortionStorageCursor& cursor) const; + + void AddScalar(const std::shared_ptr& scalar); + + NKikimrColumnShardStatisticsProto::TPortionStorage SerializeToProto() const; + + static TConclusion BuildFromProto(const NKikimrColumnShardStatisticsProto::TPortionStorage& proto) { + TPortionStorage result; + auto parse = result.DeserializeFromProto(proto); + if (!parse) { + return parse; + } + return result; + } +}; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make new file mode 100644 index 000000000000..f63520354edf --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/abstract/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +SRCS( + portion_storage.cpp + constructor.cpp + operator.cpp + common.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/statistics/protos + ydb/core/tx/columnshard/engines/scheme/abstract + contrib/libs/apache/arrow + ydb/library/actors/core + ydb/library/conclusion +) + +GENERATE_ENUM_SERIALIZATION(common.h) + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp new file mode 100644 index 000000000000..a12a27812350 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.cpp @@ -0,0 +1,45 @@ +#include "constructor.h" +#include "operator.h" + +namespace NKikimr::NOlap::NStatistics::NMax { + +NKikimr::TConclusion> TConstructor::DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const { + auto column = currentSchema.GetColumns().GetByName(ColumnName); + if (!TOperator::IsAvailableType(column->GetType())) { + return TConclusionStatus::Fail("incorrect type for stat calculation"); + } + return std::make_shared(column->GetId()); +} + +bool TConstructor::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { + if (!proto.HasMax()) { + return false; + } + ColumnName = proto.GetMax().GetColumnName(); + if (!ColumnName) { + return false; + } + return true; +} + +void TConstructor::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { + AFL_VERIFY(!!ColumnName); + proto.MutableMax()->SetColumnName(ColumnName); +} + +NKikimr::TConclusionStatus TConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonData) { + if (!jsonData.Has("column_name")) { + return TConclusionStatus::Fail("no column_name field in json description"); + } + TString columnNameLocal; + if (!jsonData["column_name"].GetString(&columnNameLocal)) { + return TConclusionStatus::Fail("incorrect column_name field in json description (no string)"); + } + if (!columnNameLocal) { + return TConclusionStatus::Fail("empty column_name field in json description"); + } + ColumnName = columnNameLocal; + return TConclusionStatus::Success(); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h new file mode 100644 index 000000000000..695096a63d2f --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/max/constructor.h @@ -0,0 +1,33 @@ +#pragma once +#include +#include + +#include + +namespace NKikimr::NOlap::NStatistics::NMax { + +class TConstructor: public IConstructor { +private: + using TBase = IConstructor; + static inline const auto Registrator = TFactory::TRegistrator(::ToString(EType::Max)); + YDB_READONLY(TString, ColumnName, 0); +protected: + virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const override; + virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) override; + virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const override; + virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) override; +public: + TConstructor(const TString& columnName) + : TBase(EType::Max) + , ColumnName(columnName) + { + + } + + TConstructor() + :TBase(EType::Max) { + + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp new file mode 100644 index 000000000000..8e2c179e077b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.cpp @@ -0,0 +1,41 @@ +#include "operator.h" +#include +#include +#include + +namespace NKikimr::NOlap::NStatistics::NMax { + +void TOperator::DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { + auto loader = index.GetColumnLoaderVerified(EntityId); + auto it = data.find(EntityId); + AFL_VERIFY(it != data.end()); + std::shared_ptr result; + for (auto&& i : it->second) { + auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(i->GetData())); + AFL_VERIFY(rb->num_columns() == 1); + auto res = NArrow::FindMinMaxPosition(rb->column(0)); + auto currentScalarMax = NArrow::TStatusValidator::GetValid(rb->column(0)->GetScalar(res.second)); + if (!result || NArrow::ScalarCompare(result, currentScalarMax) < 0) { + result = currentScalarMax; + } + } + portionStats.AddScalar(result); +} + +bool TOperator::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { + if (!proto.HasMax()) { + return false; + } + EntityId = proto.GetMax().GetEntityId(); + if (!EntityId) { + return false; + } + return true; +} + +void TOperator::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { + AFL_VERIFY(EntityId); + proto.MutableMax()->SetEntityId(EntityId); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h new file mode 100644 index 000000000000..50e0a76599eb --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/max/operator.h @@ -0,0 +1,64 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NStatistics::NMax { + +class TOperator: public IOperator { +private: + using TBase = IOperator; + ui32 EntityId = 0; + static inline auto Registrator = TFactory::TRegistrator(::ToString(EType::Max)); +protected: + virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const override { + std::shared_ptr scalar = portionStatsFrom.GetScalarVerified(cursor); + portionStatsTo.AddScalar(scalar); + } + + virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const override; + virtual void DoShiftCursor(TPortionStorageCursor& cursor) const override { + cursor.AddScalarsPosition(1); + } + virtual std::vector GetEntityIds() const override { + return {EntityId}; + } + virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) override; + virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const override; +public: + + static bool IsAvailableType(const NScheme::TTypeInfo type) { + switch (type.GetTypeId()) { + case NScheme::NTypeIds::Int8: + case NScheme::NTypeIds::Uint8: + case NScheme::NTypeIds::Int16: + case NScheme::NTypeIds::Uint16: + case NScheme::NTypeIds::Int32: + case NScheme::NTypeIds::Uint32: + case NScheme::NTypeIds::Int64: + case NScheme::NTypeIds::Uint64: + case NScheme::NTypeIds::Timestamp: + case NScheme::NTypeIds::Double: + case NScheme::NTypeIds::Float: + case NScheme::NTypeIds::Datetime: + case NScheme::NTypeIds::Date: + return true; + default: + break; + } + return false; + } + + TOperator() + : TBase(EType::Max) + { + + } + + TOperator(const ui32 entityId) + : TBase(EType::Max) + , EntityId(entityId) { + + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make new file mode 100644 index 000000000000..631c95eeb3d8 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/max/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + GLOBAL constructor.cpp + GLOBAL operator.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/statistics/abstract + ydb/core/tx/columnshard/engines/scheme/abstract + ydb/core/tx/columnshard/splitter/abstract + ydb/core/formats/arrow +) + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto new file mode 100644 index 000000000000..c99f485d399d --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/data.proto @@ -0,0 +1,66 @@ +package NKikimrColumnShardStatisticsProto; + +message TScalar { + message TTimestamp { + optional uint64 Value = 1; + optional uint32 Unit = 2; + } + oneof Value { + bool Bool = 1; + uint32 Uint8 = 2; + uint32 Uint16 = 3; + uint32 Uint32 = 4; + uint64 Uint64 = 5; + + int32 Int8 = 6; + int32 Int16 = 7; + int32 Int32 = 8; + int64 Int64 = 9; + + double Double = 10; + + TTimestamp Timestamp = 11; + + float Float = 12; + } +} + +message TPortionStorage { + repeated TScalar Scalars = 1; +} + +message TMaxConstructor { + optional string ColumnName = 3; +} + +message TVariabilityConstructor { + optional string ColumnName = 3; +} + +message TConstructorContainer { + optional string Name = 1; + + optional string ClassName = 40; + oneof Implementation { + TMaxConstructor Max = 41; + TVariabilityConstructor Variability = 42; + } +} + +message TMaxOperator { + optional uint32 EntityId = 1; +} + +message TVariabilityOperator { + optional uint32 EntityId = 1; +} + +message TOperatorContainer { + optional string Name = 1; + + optional string ClassName = 40; + oneof Implementation { + TMaxOperator Max = 41; + TVariabilityOperator Variability = 42; + } +} diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make new file mode 100644 index 000000000000..f72b3b7cf620 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/protos/ya.make @@ -0,0 +1,11 @@ +PROTO_LIBRARY() + +SRCS( + data.proto +) + +PEERDIR( + +) + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp new file mode 100644 index 000000000000..25840673fcb5 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.cpp @@ -0,0 +1,45 @@ +#include "constructor.h" +#include "operator.h" + +namespace NKikimr::NOlap::NStatistics::NVariability { + +NKikimr::TConclusion> TConstructor::DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const { + auto column = currentSchema.GetColumns().GetByName(ColumnName); + if (!TOperator::IsAvailableType(column->GetType())) { + return TConclusionStatus::Fail("incorrect type for stat calculation"); + } + return std::make_shared(column->GetId()); +} + +bool TConstructor::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { + if (!proto.HasVariability()) { + return false; + } + ColumnName = proto.GetVariability().GetColumnName(); + if (!ColumnName) { + return false; + } + return true; +} + +void TConstructor::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const { + AFL_VERIFY(!!ColumnName); + proto.MutableVariability()->SetColumnName(ColumnName); +} + +NKikimr::TConclusionStatus TConstructor::DoDeserializeFromJson(const NJson::TJsonValue& jsonData) { + if (!jsonData.Has("column_name")) { + return TConclusionStatus::Fail("no column_name field in json description"); + } + TString columnNameLocal; + if (!jsonData["column_name"].GetString(&columnNameLocal)) { + return TConclusionStatus::Fail("incorrect column_name field in json description (no string)"); + } + if (!columnNameLocal) { + return TConclusionStatus::Fail("empty column_name field in json description"); + } + ColumnName = columnNameLocal; + return TConclusionStatus::Success(); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h new file mode 100644 index 000000000000..809c9043faac --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/constructor.h @@ -0,0 +1,33 @@ +#pragma once +#include +#include + +#include + +namespace NKikimr::NOlap::NStatistics::NVariability { + +class TConstructor: public IConstructor { +private: + using TBase = IConstructor; + static inline const auto Registrator = TFactory::TRegistrator(::ToString(EType::Variability)); + YDB_READONLY(TString, ColumnName, 0); +protected: + virtual TConclusion> DoCreateOperator(const NSchemeShard::TOlapSchema& currentSchema) const override; + virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) override; + virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) const override; + virtual TConclusionStatus DoDeserializeFromJson(const NJson::TJsonValue& jsonData) override; +public: + TConstructor(const TString& columnName) + : TBase(EType::Max) + , ColumnName(columnName) + { + + } + + TConstructor() + :TBase(EType::Variability) { + + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp new file mode 100644 index 000000000000..d43d617171bb --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.cpp @@ -0,0 +1,164 @@ +#include "operator.h" +#include +#include +#include + +namespace NKikimr::NOlap::NStatistics::NVariability { + +class IValuesContainer { +protected: + std::optional DataType; + ui32 DifferentCount = 0; + + virtual void DoAddArray(const std::shared_ptr& array) = 0; +public: + virtual ~IValuesContainer() = default; + ui32 GetDifferentCount() const { + return DifferentCount; + } + + void AddArray(const std::shared_ptr& array) { + if (!DataType) { + DataType = array->type_id(); + } else { + AFL_VERIFY(DataType == array->type_id())("base", (ui32)*DataType)("to", (ui32)array->type_id()); + } + return DoAddArray(array); + } +}; + +template +class TCTypeValuesContainer: public IValuesContainer { +private: + using TWrap = TArrowElement; + using TArray = typename arrow::TypeTraits::ArrayType; + using TCType = typename TWrap::T::c_type; + using TCContainer = THashSet; + + TCContainer ElementsStorage; +protected: + virtual void DoAddArray(const std::shared_ptr& array) override { + NArrow::SwitchType(array->type_id(), [&](const auto& type) { + using TWrap = std::decay_t; + if constexpr (std::is_same_v) { + const TArray& arrTyped = static_cast(*array); + for (ui32 i = 0; i < array->length(); ++i) { + if constexpr (arrow::has_c_type()) { + if (ElementsStorage.emplace(arrTyped.Value(i)).second) { + ++DifferentCount; + } + continue; + } + AFL_VERIFY(false); + } + return true; + } + AFL_VERIFY(false); + return false; + }); + } +}; + +template +class TStringValuesContainer: public IValuesContainer { +private: + using TWrap = TArrowElement; + using TArray = typename arrow::TypeTraits::ArrayType; + using TCType = TString; + using TCContainer = THashSet; + + TCContainer ElementsStorage; +protected: + virtual void DoAddArray(const std::shared_ptr& array) override { + NArrow::SwitchType(array->type_id(), [&](const auto& type) { + using TWrap = std::decay_t; + if constexpr (std::is_same_v) { + const TArray& arrTyped = static_cast(*array); + for (ui32 i = 0; i < array->length(); ++i) { + if constexpr (arrow::has_string_view()) { + auto value = arrTyped.GetView(i); + if (ElementsStorage.emplace(value.data(), value.size()).second) { + ++DifferentCount; + } + continue; + } + AFL_VERIFY(false); + } + return true; + } + AFL_VERIFY(false); + return false; + }); + } +}; + +class TDifferentElementsAggregator { +private: + std::shared_ptr Container; +public: + TDifferentElementsAggregator() = default; + + bool HasData() const { + return !!Container; + } + + ui32 GetDifferentCount() const { + return Container ? Container->GetDifferentCount() : 0; + } + + void AddArray(const std::shared_ptr& array) { + if (!Container) { + NArrow::SwitchType(array->type_id(), [&](const auto& type) { + using TWrap = std::decay_t; + if (!Container) { + if constexpr (arrow::has_c_type()) { + Container = std::make_shared>(); + Container->AddArray(array); + return true; + } + if constexpr (arrow::has_string_view()) { + Container = std::make_shared>(); + Container->AddArray(array); + return true; + } + AFL_VERIFY(false); + } + return false; + }); + } + Container->AddArray(array); + } +}; + +void TOperator::DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const { + auto it = data.find(EntityId); + AFL_VERIFY(it != data.end()); + auto loader = index.GetColumnLoaderVerified(EntityId); + std::shared_ptr result; + TDifferentElementsAggregator aggregator; + for (auto&& i : it->second) { + auto rb = NArrow::TStatusValidator::GetValid(loader->Apply(i->GetData())); + AFL_VERIFY(rb->num_columns() == 1); + aggregator.AddArray(rb->column(0)); + } + AFL_VERIFY(aggregator.HasData()); + portionStats.AddScalar(std::make_shared(aggregator.GetDifferentCount())); +} + +bool TOperator::DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { + if (!proto.HasVariability()) { + return false; + } + EntityId = proto.GetVariability().GetEntityId(); + if (!EntityId) { + return false; + } + return true; +} + +void TOperator::DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { + AFL_VERIFY(EntityId); + proto.MutableVariability()->SetEntityId(EntityId); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h new file mode 100644 index 000000000000..ac0fdd110030 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/operator.h @@ -0,0 +1,67 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NStatistics::NVariability { + +class TOperator: public IOperator { +private: + using TBase = IOperator; + ui32 EntityId = 0; + static inline auto Registrator = TFactory::TRegistrator(::ToString(EType::Variability)); +protected: + virtual void DoCopyData(const TPortionStorageCursor& cursor, const TPortionStorage& portionStatsFrom, TPortionStorage& portionStatsTo) const override { + std::shared_ptr scalar = portionStatsFrom.GetScalarVerified(cursor); + portionStatsTo.AddScalar(scalar); + } + + virtual void DoFillStatisticsData(const THashMap>>& data, TPortionStorage& portionStats, const IIndexInfo& index) const override; + virtual void DoShiftCursor(TPortionStorageCursor& cursor) const override { + cursor.AddScalarsPosition(1); + } + virtual std::vector GetEntityIds() const override { + return {EntityId}; + } + virtual bool DoDeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) override; + virtual void DoSerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const override; +public: + + static bool IsAvailableType(const NScheme::TTypeInfo type) { + switch (type.GetTypeId()) { + case NScheme::NTypeIds::Int8: + case NScheme::NTypeIds::Uint8: + case NScheme::NTypeIds::Int16: + case NScheme::NTypeIds::Uint16: + case NScheme::NTypeIds::Int32: + case NScheme::NTypeIds::Uint32: + case NScheme::NTypeIds::Int64: + case NScheme::NTypeIds::Uint64: + case NScheme::NTypeIds::String: + case NScheme::NTypeIds::Utf8: + case NScheme::NTypeIds::Uuid: + case NScheme::NTypeIds::Timestamp: + case NScheme::NTypeIds::Double: + case NScheme::NTypeIds::Float: + case NScheme::NTypeIds::Datetime: + case NScheme::NTypeIds::Date: + return true; + default: + break; + } + return false; + } + + TOperator() + : TBase(EType::Variability) + { + + } + + TOperator(const ui32 entityId) + : TBase(EType::Variability) + , EntityId(entityId) { + + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make new file mode 100644 index 000000000000..631c95eeb3d8 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/variability/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + GLOBAL constructor.cpp + GLOBAL operator.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/statistics/abstract + ydb/core/tx/columnshard/engines/scheme/abstract + ydb/core/tx/columnshard/splitter/abstract + ydb/core/formats/arrow +) + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make b/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make new file mode 100644 index 000000000000..3baed9c3538a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/statistics/ya.make @@ -0,0 +1,10 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/statistics/abstract + ydb/core/tx/columnshard/engines/scheme/statistics/max + ydb/core/tx/columnshard/engines/scheme/statistics/variability + ydb/core/tx/columnshard/engines/scheme/statistics/protos +) + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/tier_info.cpp b/ydb/core/tx/columnshard/engines/scheme/tier_info.cpp index 1672d7a97ac7..d49c21ed0647 100644 --- a/ydb/core/tx/columnshard/engines/scheme/tier_info.cpp +++ b/ydb/core/tx/columnshard/engines/scheme/tier_info.cpp @@ -1,21 +1 @@ #include "tier_info.h" - -namespace NKikimr::NOlap { - -std::optional TTierInfo::ScalarToInstant(const std::shared_ptr& scalar) const { - const ui64 unitsInSeconds = TtlUnitsInSecond ? TtlUnitsInSecond : 1; - switch (scalar->type->id()) { - case arrow::Type::TIMESTAMP: - return TInstant::MicroSeconds(std::static_pointer_cast(scalar)->value); - case arrow::Type::UINT16: // YQL Date - return TInstant::Days(std::static_pointer_cast(scalar)->value); - case arrow::Type::UINT32: // YQL Datetime or Uint32 - return TInstant::MicroSeconds(std::static_pointer_cast(scalar)->value / (1.0 * unitsInSeconds / 1000000)); - case arrow::Type::UINT64: - return TInstant::MicroSeconds(std::static_pointer_cast(scalar)->value / (1.0 * unitsInSeconds / 1000000)); - default: - return {}; - } -} - -} diff --git a/ydb/core/tx/columnshard/engines/scheme/tier_info.h b/ydb/core/tx/columnshard/engines/scheme/tier_info.h index 395752201f8d..38c92a8aca16 100644 --- a/ydb/core/tx/columnshard/engines/scheme/tier_info.h +++ b/ydb/core/tx/columnshard/engines/scheme/tier_info.h @@ -1,167 +1,2 @@ #pragma once - -#include -#include -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap { - -class TTierInfo { -private: - YDB_READONLY_DEF(TString, Name); - YDB_READONLY_DEF(TString, EvictColumnName); - YDB_READONLY_DEF(TDuration, EvictDuration); - - ui32 TtlUnitsInSecond; - YDB_READONLY_DEF(std::optional, Serializer); -public: - TTierInfo(const TString& tierName, TDuration evictDuration, const TString& column, ui32 unitsInSecond = 0) - : Name(tierName) - , EvictColumnName(column) - , EvictDuration(evictDuration) - , TtlUnitsInSecond(unitsInSecond) - { - Y_ABORT_UNLESS(!!Name); - Y_ABORT_UNLESS(!!EvictColumnName); - } - - TInstant GetEvictInstant(const TInstant now) const { - return now - EvictDuration; - } - - TTierInfo& SetSerializer(const NArrow::NSerialization::TSerializerContainer& value) { - Serializer = value; - return *this; - } - - std::shared_ptr GetEvictColumn(const std::shared_ptr& schema) const { - return schema->GetFieldByName(EvictColumnName); - } - - std::optional ScalarToInstant(const std::shared_ptr& scalar) const; - - static std::shared_ptr MakeTtl(const TDuration evictDuration, const TString& ttlColumn, ui32 unitsInSecond = 0) { - return std::make_shared("TTL", evictDuration, ttlColumn, unitsInSecond); - } - - TString GetDebugString() const { - TStringBuilder sb; - sb << "name=" << Name << ";duration=" << EvictDuration << ";column=" << EvictColumnName << ";serializer="; - if (Serializer) { - sb << Serializer->DebugString(); - } else { - sb << "NOT_SPECIFIED(Default)"; - } - sb << ";"; - return sb; - } -}; - -class TTierRef { -public: - TTierRef(const std::shared_ptr& tierInfo) - : Info(tierInfo) - { - Y_ABORT_UNLESS(tierInfo); - } - - bool operator < (const TTierRef& b) const { - if (Info->GetEvictDuration() > b.Info->GetEvictDuration()) { - return true; - } else if (Info->GetEvictDuration() == b.Info->GetEvictDuration()) { - return Info->GetName() > b.Info->GetName(); // add stability: smaller name is hotter - } - return false; - } - - bool operator == (const TTierRef& b) const { - return Info->GetEvictDuration() == b.Info->GetEvictDuration() - && Info->GetName() == b.Info->GetName(); - } - - const TTierInfo& Get() const { - return *Info; - } - - std::shared_ptr GetPtr() const { - return Info; - } - -private: - std::shared_ptr Info; -}; - -class TTiering { - using TTiersMap = THashMap>; - TTiersMap TierByName; - TSet OrderedTiers; -public: - - std::shared_ptr Ttl; - - const TTiersMap& GetTierByName() const { - return TierByName; - } - - const TSet& GetOrderedTiers() const { - return OrderedTiers; - } - - bool HasTiers() const { - return !OrderedTiers.empty(); - } - - void Add(const std::shared_ptr& tier) { - if (HasTiers()) { - // TODO: support different ttl columns - Y_ABORT_UNLESS(tier->GetEvictColumnName() == OrderedTiers.begin()->Get().GetEvictColumnName()); - } - - TierByName.emplace(tier->GetName(), tier); - OrderedTiers.emplace(tier); - } - - TString GetHottestTierName() const { - if (OrderedTiers.size()) { - return OrderedTiers.rbegin()->Get().GetName(); // hottest one - } - return {}; - } - - std::optional GetSerializer(const TString& name) const { - auto it = TierByName.find(name); - if (it != TierByName.end()) { - Y_ABORT_UNLESS(!name.empty()); - return it->second->GetSerializer(); - } - return {}; - } - - THashSet GetTtlColumns() const { - THashSet out; - if (Ttl) { - out.insert(Ttl->GetEvictColumnName()); - } - for (auto& [tierName, tier] : TierByName) { - out.insert(tier->GetEvictColumnName()); - } - return out; - } - - TString GetDebugString() const { - TStringBuilder sb; - if (Ttl) { - sb << Ttl->GetDebugString() << "; "; - } - for (auto&& i : OrderedTiers) { - sb << i.Get().GetDebugString() << "; "; - } - return sb; - } -}; - -} +#include "tiering/tier_info.h" \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/scheme/tiering/common.cpp b/ydb/core/tx/columnshard/engines/scheme/tiering/common.cpp new file mode 100644 index 000000000000..e60d56d39c91 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/tiering/common.cpp @@ -0,0 +1,5 @@ +#include "common.h" + +namespace NKikimr::NOlap::NTiering::NCommon { + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/tiering/common.h b/ydb/core/tx/columnshard/engines/scheme/tiering/common.h new file mode 100644 index 000000000000..ab6ed9a05a7e --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/tiering/common.h @@ -0,0 +1,9 @@ +#pragma once + +#include + +namespace NKikimr::NOlap::NTiering::NCommon { + +static inline const TString DeleteTierName = "$$DELETE"; + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.cpp b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.cpp new file mode 100644 index 000000000000..659062338bb0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.cpp @@ -0,0 +1,42 @@ +#include "tier_info.h" +#include + +namespace NKikimr::NOlap { + +std::optional TTierInfo::ScalarToInstant(const std::shared_ptr& scalar) const { + const ui64 unitsInSeconds = TtlUnitsInSecond ? TtlUnitsInSecond : 1; + switch (scalar->type->id()) { + case arrow::Type::TIMESTAMP: + return TInstant::MicroSeconds(std::static_pointer_cast(scalar)->value); + case arrow::Type::UINT16: // YQL Date + return TInstant::Days(std::static_pointer_cast(scalar)->value); + case arrow::Type::UINT32: // YQL Datetime or Uint32 + return TInstant::MicroSeconds(std::static_pointer_cast(scalar)->value / (1.0 * unitsInSeconds / 1000000)); + case arrow::Type::UINT64: + return TInstant::MicroSeconds(std::static_pointer_cast(scalar)->value / (1.0 * unitsInSeconds / 1000000)); + default: + return {}; + } +} + +TTiering::TTieringContext TTiering::GetTierToMove(const std::shared_ptr& max, const TInstant now) const { + AFL_VERIFY(OrderedTiers.size()); + std::optional nextTierName; + std::optional nextTierDuration; + for (auto& tierRef : GetOrderedTiers()) { + auto& tierInfo = tierRef.Get(); + auto mpiOpt = tierInfo.ScalarToInstant(max); + Y_ABORT_UNLESS(mpiOpt); + const TInstant maxTieringPortionInstant = *mpiOpt; + const TDuration dWaitLocal = maxTieringPortionInstant - tierInfo.GetEvictInstant(now); + if (!dWaitLocal) { + return TTieringContext(tierInfo.GetName(), tierInfo.GetEvictInstant(now) - maxTieringPortionInstant, nextTierName, nextTierDuration); + } else { + nextTierName = tierInfo.GetName(); + nextTierDuration = dWaitLocal; + } + } + return TTieringContext(IStoragesManager::DefaultStorageId, TDuration::Zero(), nextTierName, nextTierDuration); +} + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h new file mode 100644 index 000000000000..c65cb1703ad0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/tiering/tier_info.h @@ -0,0 +1,215 @@ +#pragma once +#include "common.h" + +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap { + +class TTierInfo { +private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY_DEF(TString, EvictColumnName); + YDB_READONLY_DEF(TDuration, EvictDuration); + + ui32 TtlUnitsInSecond; + YDB_READONLY_DEF(std::optional, Serializer); +public: + static TString GetTtlTierName() { + return NTiering::NCommon::DeleteTierName; + } + + TTierInfo(const TString& tierName, TDuration evictDuration, const TString& column, ui32 unitsInSecond = 0) + : Name(tierName) + , EvictColumnName(column) + , EvictDuration(evictDuration) + , TtlUnitsInSecond(unitsInSecond) + { + Y_ABORT_UNLESS(!!Name); + Y_ABORT_UNLESS(!!EvictColumnName); + } + + TInstant GetEvictInstant(const TInstant now) const { + return now - EvictDuration; + } + + TTierInfo& SetSerializer(const NArrow::NSerialization::TSerializerContainer& value) { + Serializer = value; + return *this; + } + + std::shared_ptr GetEvictColumn(const std::shared_ptr& schema) const { + return schema->GetFieldByName(EvictColumnName); + } + + std::optional ScalarToInstant(const std::shared_ptr& scalar) const; + + static std::shared_ptr MakeTtl(const TDuration evictDuration, const TString& ttlColumn, ui32 unitsInSecond = 0) { + return std::make_shared(NTiering::NCommon::DeleteTierName, evictDuration, ttlColumn, unitsInSecond); + } + + TString GetDebugString() const { + TStringBuilder sb; + sb << "name=" << Name << ";duration=" << EvictDuration << ";column=" << EvictColumnName << ";serializer="; + if (Serializer) { + sb << Serializer->DebugString(); + } else { + sb << "NOT_SPECIFIED(Default)"; + } + sb << ";"; + return sb; + } +}; + +class TTierRef { +public: + TTierRef(const std::shared_ptr& tierInfo) + : Info(tierInfo) + { + Y_ABORT_UNLESS(tierInfo); + } + + bool operator < (const TTierRef& b) const { + if (Info->GetEvictDuration() > b.Info->GetEvictDuration()) { + return true; + } else if (Info->GetEvictDuration() == b.Info->GetEvictDuration()) { + if (Info->GetName() == NTiering::NCommon::DeleteTierName) { + return true; + } else if (b.Info->GetName() == NTiering::NCommon::DeleteTierName) { + return false; + } + return Info->GetName() > b.Info->GetName(); // add stability: smaller name is hotter + } + return false; + } + + bool operator == (const TTierRef& b) const { + return Info->GetEvictDuration() == b.Info->GetEvictDuration() + && Info->GetName() == b.Info->GetName(); + } + + const TTierInfo& Get() const { + return *Info; + } + + std::shared_ptr GetPtr() const { + return Info; + } + +private: + std::shared_ptr Info; +}; + +class TTiering { + using TTiersMap = THashMap>; + TTiersMap TierByName; + TSet OrderedTiers; + TString TTLColumnName; +public: + + class TTieringContext { + private: + YDB_READONLY_DEF(TString, CurrentTierName); + YDB_READONLY_DEF(TDuration, CurrentTierLag); + + YDB_READONLY_DEF(std::optional, NextTierName); + YDB_READONLY_DEF(std::optional, NextTierWaiting); + public: + TString DebugString() const { + TStringBuilder sb; + sb << CurrentTierName << "/" << CurrentTierLag << ";"; + if (NextTierName) { + sb << *NextTierName << "/" << *NextTierWaiting << ";"; + } + return sb; + } + + TTieringContext(const TString& tierName, const TDuration waiting, const std::optional& nextTierName = {}, const std::optional& nextTierDuration = {}) + : CurrentTierName(tierName) + , CurrentTierLag(waiting) + , NextTierName(nextTierName) + , NextTierWaiting(nextTierDuration) + { + AFL_VERIFY(!nextTierName == !nextTierDuration); + } + + TString GetNextTierNameVerified() const { + AFL_VERIFY(NextTierName); + return *NextTierName; + } + + TDuration GetNextTierWaitingVerified() const { + AFL_VERIFY(NextTierWaiting); + return *NextTierWaiting; + } + }; + + TTieringContext GetTierToMove(const std::shared_ptr& max, const TInstant now) const; + + const TTiersMap& GetTierByName() const { + return TierByName; + } + + std::shared_ptr GetTierByName(const TString& name) const { + auto it = TierByName.find(name); + if (it == TierByName.end()) { + return nullptr; + } + return it->second; + } + + const TSet& GetOrderedTiers() const { + return OrderedTiers; + } + + bool HasTiers() const { + return !OrderedTiers.empty(); + } + + [[nodiscard]] bool Add(const std::shared_ptr& tier) { + AFL_VERIFY(tier); + if (!TTLColumnName) { + TTLColumnName = tier->GetEvictColumnName(); + } else if (TTLColumnName != tier->GetEvictColumnName()) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("problem", "incorrect_tiering_metadata")("column_before", TTLColumnName)("column_new", tier->GetEvictColumnName()); + return false; + } + + TierByName.emplace(tier->GetName(), tier); + OrderedTiers.emplace(tier); + return true; + } + + std::optional GetSerializer(const TString& name) const { + auto it = TierByName.find(name); + if (it != TierByName.end()) { + Y_ABORT_UNLESS(!name.empty()); + return it->second->GetSerializer(); + } + return {}; + } + + const TString& GetTtlColumn() const { + AFL_VERIFY(TTLColumnName); + return TTLColumnName; + } + + const TString& GetEvictColumnName() const { + return TTLColumnName; + } + + TString GetDebugString() const { + TStringBuilder sb; + for (auto&& i : OrderedTiers) { + sb << i.Get().GetDebugString() << "; "; + } + return sb; + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/tiering/ya.make b/ydb/core/tx/columnshard/engines/scheme/tiering/ya.make new file mode 100644 index 000000000000..45f21958d0d8 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/tiering/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + tier_info.cpp + common.cpp +) + +PEERDIR( + ydb/core/formats/arrow/serializer +) + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp new file mode 100644 index 000000000000..771ebe52e783 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.cpp @@ -0,0 +1,141 @@ +#include "abstract_scheme.h" + +#include +#include +#include + +namespace NKikimr::NOlap { + +std::shared_ptr ISnapshotSchema::GetFieldByIndex(const int index) const { + auto schema = GetSchema(); + if (!schema || index < 0 || index >= schema->num_fields()) { + return nullptr; + } + return schema->field(index); +} +std::shared_ptr ISnapshotSchema::GetFieldByColumnIdOptional(const ui32 columnId) const { + return GetFieldByIndex(GetFieldIndex(columnId)); +} + +std::set ISnapshotSchema::GetPkColumnsIds() const { + std::set result; + for (auto&& field : GetIndexInfo().GetReplaceKey()->fields()) { + result.emplace(GetColumnId(field->name())); + } + return result; + +} + +std::shared_ptr ISnapshotSchema::NormalizeBatch(const ISnapshotSchema& dataSchema, const std::shared_ptr batch) const { + if (dataSchema.GetSnapshot() == GetSnapshot()) { + return batch; + } + Y_ABORT_UNLESS(dataSchema.GetSnapshot() < GetSnapshot()); + const std::shared_ptr& resultArrowSchema = GetSchema(); + std::vector> newColumns; + newColumns.reserve(resultArrowSchema->num_fields()); + + for (size_t i = 0; i < resultArrowSchema->fields().size(); ++i) { + auto& resultField = resultArrowSchema->fields()[i]; + auto columnId = GetIndexInfo().GetColumnId(resultField->name()); + auto oldColumnIndex = dataSchema.GetFieldIndex(columnId); + if (oldColumnIndex >= 0) { // ColumnExists + auto oldColumnInfo = dataSchema.GetFieldByIndex(oldColumnIndex); + Y_ABORT_UNLESS(oldColumnInfo); + auto columnData = batch->GetColumnByName(oldColumnInfo->name()); + Y_ABORT_UNLESS(columnData); + newColumns.push_back(columnData); + } else { // AddNullColumn + auto nullColumn = NArrow::MakeEmptyBatch(arrow::schema({resultField}), batch->num_rows()); + newColumns.push_back(nullColumn->column(0)); + } + } + return arrow::RecordBatch::Make(resultArrowSchema, batch->num_rows(), newColumns); +} + +std::shared_ptr ISnapshotSchema::PrepareForInsert(const TString& data, const std::shared_ptr& dataSchema) const { + std::shared_ptr dstSchema = GetIndexInfo().ArrowSchema(); + auto batch = NArrow::DeserializeBatch(data, (dataSchema ? dataSchema : dstSchema)); + if (!batch) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "DeserializeBatch() failed"); + return nullptr; + } + if (batch->num_rows() == 0) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "empty batch"); + return nullptr; + } + + // Correct schema + if (dataSchema) { + batch = NArrow::ExtractColumns(batch, dstSchema, true); + if (!batch) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", "cannot correct schema"); + return nullptr; + } + } + + if (!batch->schema()->Equals(dstSchema)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", TStringBuilder() << "unexpected schema for insert batch: '" << batch->schema()->ToString() << "'"); + return nullptr; + } + + const auto& sortingKey = GetIndexInfo().GetPrimaryKey(); + Y_ABORT_UNLESS(sortingKey); + + // Check PK is NOT NULL + for (auto& field : sortingKey->fields()) { + auto column = batch->GetColumnByName(field->name()); + if (!column) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", TStringBuilder() << "missing PK column '" << field->name() << "'"); + return nullptr; + } + if (NArrow::HasNulls(column)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", TStringBuilder() << "PK column '" << field->name() << "' contains NULLs"); + return nullptr; + } + } + + auto status = batch->ValidateFull(); + if (!status.ok()) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("error", status.ToString()); + return nullptr; + } + batch = NArrow::SortBatch(batch, sortingKey, true); + Y_DEBUG_ABORT_UNLESS(NArrow::IsSortedAndUnique(batch, sortingKey)); + return batch; +} + +ui32 ISnapshotSchema::GetColumnId(const std::string& columnName) const { + auto id = GetColumnIdOptional(columnName); + AFL_VERIFY(id)("column_name", columnName)("schema", JoinSeq(",", GetSchema()->field_names())); + return *id; +} + +std::shared_ptr ISnapshotSchema::GetFieldByColumnIdVerified(const ui32 columnId) const { + auto result = GetFieldByColumnIdOptional(columnId); + AFL_VERIFY(result)("event", "unknown_column")("column_id", columnId)("schema", DebugString()); + return result; +} + +std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const ui32 columnId) const { + auto result = GetColumnLoaderOptional(columnId); + AFL_VERIFY(result); + return result; +} + +std::shared_ptr ISnapshotSchema::GetColumnLoaderVerified(const std::string& columnName) const { + auto result = GetColumnLoaderOptional(columnName); + AFL_VERIFY(result); + return result; +} + +std::shared_ptr ISnapshotSchema::GetColumnLoaderOptional(const std::string& columnName) const { + const std::optional id = GetColumnIdOptional(columnName); + if (id) { + return GetColumnLoaderOptional(*id); + } else { + return nullptr; + } +} + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h new file mode 100644 index 000000000000..0fc4047de574 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/abstract_scheme.h @@ -0,0 +1,57 @@ +#pragma once +#include +#include + +#include + +#include + +namespace NKikimr::NOlap { + +struct TIndexInfo; +class TSaverContext; + +class ISnapshotSchema { +protected: + virtual TString DoDebugString() const = 0; +public: + using TPtr = std::shared_ptr; + + virtual ~ISnapshotSchema() {} + virtual std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const = 0; + std::shared_ptr GetColumnLoaderVerified(const ui32 columnId) const; + std::shared_ptr GetColumnLoaderOptional(const std::string& columnName) const; + std::shared_ptr GetColumnLoaderVerified(const std::string& columnName) const; + + virtual TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; + TColumnSaver GetColumnSaver(const TString& columnName) const { + return GetColumnSaver(GetColumnId(columnName)); + } + TColumnSaver GetColumnSaver(const std::string& columnName) const { + return GetColumnSaver(TString(columnName.data(), columnName.size())); + } + + virtual std::optional GetColumnIdOptional(const std::string& columnName) const = 0; + virtual int GetFieldIndex(const ui32 columnId) const = 0; + + ui32 GetColumnId(const std::string& columnName) const; + std::shared_ptr GetFieldByIndex(const int index) const; + std::shared_ptr GetFieldByColumnIdOptional(const ui32 columnId) const; + std::shared_ptr GetFieldByColumnIdVerified(const ui32 columnId) const; + + TString DebugString() const { + return DoDebugString(); + } + virtual const std::shared_ptr& GetSchema() const = 0; + virtual const TIndexInfo& GetIndexInfo() const = 0; + virtual const TSnapshot& GetSnapshot() const = 0; + virtual ui64 GetVersion() const = 0; + virtual ui32 GetColumnsCount() const = 0; + + std::set GetPkColumnsIds() const; + + std::shared_ptr NormalizeBatch(const ISnapshotSchema& dataSchema, const std::shared_ptr batch) const; + std::shared_ptr PrepareForInsert(const TString& data, const std::shared_ptr& dataSchema) const; +}; + +} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp new file mode 100644 index 000000000000..8832e7eb0ec8 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.cpp @@ -0,0 +1,94 @@ +#include "filtered_scheme.h" +#include + + +namespace NKikimr::NOlap { + +TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds) + : TFilteredSnapshotSchema(originalSnapshot, std::set(columnIds.begin(), columnIds.end())) +{} + +TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds) + : OriginalSnapshot(originalSnapshot) + , ColumnIds(columnIds) +{ + std::vector> schemaFields; + for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { + if (!ColumnIds.contains(OriginalSnapshot->GetIndexInfo().GetColumnId(i->name()))) { + continue; + } + schemaFields.emplace_back(i); + } + Schema = std::make_shared(schemaFields); +} + +TFilteredSnapshotSchema::TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames) + : OriginalSnapshot(originalSnapshot) { + for (auto&& i : columnNames) { + ColumnIds.emplace(OriginalSnapshot->GetColumnId(i)); + } + std::vector> schemaFields; + for (auto&& i : OriginalSnapshot->GetSchema()->fields()) { + if (!columnNames.contains(i->name())) { + continue; + } + schemaFields.emplace_back(i); + } + Schema = std::make_shared(schemaFields); +} + +TColumnSaver TFilteredSnapshotSchema::GetColumnSaver(const ui32 columnId) const { + Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + return OriginalSnapshot->GetColumnSaver(columnId); +} + +std::shared_ptr TFilteredSnapshotSchema::GetColumnLoaderOptional(const ui32 columnId) const { + Y_ABORT_UNLESS(ColumnIds.contains(columnId)); + return OriginalSnapshot->GetColumnLoaderOptional(columnId); +} + +std::optional TFilteredSnapshotSchema::GetColumnIdOptional(const std::string& columnName) const { + return OriginalSnapshot->GetColumnIdOptional(columnName); +} + +int TFilteredSnapshotSchema::GetFieldIndex(const ui32 columnId) const { + if (!ColumnIds.contains(columnId)) { + return -1; + } + TString columnName = OriginalSnapshot->GetIndexInfo().GetColumnName(columnId, false); + if (!columnName) { + return -1; + } + std::string name(columnName.data(), columnName.size()); + return Schema->GetFieldIndex(name); +} + +const std::shared_ptr& TFilteredSnapshotSchema::GetSchema() const { + return Schema; +} + +const TIndexInfo& TFilteredSnapshotSchema::GetIndexInfo() const { + return OriginalSnapshot->GetIndexInfo(); +} + +const TSnapshot& TFilteredSnapshotSchema::GetSnapshot() const { + return OriginalSnapshot->GetSnapshot(); +} + +ui32 TFilteredSnapshotSchema::GetColumnsCount() const { + return Schema->num_fields(); +} + +ui64 TFilteredSnapshotSchema::GetVersion() const { + return OriginalSnapshot->GetIndexInfo().GetVersion(); +} + +TString TFilteredSnapshotSchema::DoDebugString() const { + return TStringBuilder() << "(" + << "original=" << OriginalSnapshot->DebugString() << ";" + << "column_ids=[" << JoinSeq(",", ColumnIds) << "];" + << ")" + ; +} + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h new file mode 100644 index 000000000000..e9fa1b41b7c2 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/filtered_scheme.h @@ -0,0 +1,32 @@ +#pragma once + +#include "abstract_scheme.h" + +#include + +namespace NKikimr::NOlap { + +class TFilteredSnapshotSchema: public ISnapshotSchema { + ISnapshotSchema::TPtr OriginalSnapshot; + std::shared_ptr Schema; + std::set ColumnIds; +protected: + virtual TString DoDebugString() const override; +public: + TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::vector& columnIds); + TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnIds); + TFilteredSnapshotSchema(ISnapshotSchema::TPtr originalSnapshot, const std::set& columnNames); + + TColumnSaver GetColumnSaver(const ui32 columnId) const override; + std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; + std::optional GetColumnIdOptional(const std::string& columnName) const override; + int GetFieldIndex(const ui32 columnId) const override; + + const std::shared_ptr& GetSchema() const override; + const TIndexInfo& GetIndexInfo() const override; + const TSnapshot& GetSnapshot() const override; + ui32 GetColumnsCount() const override; + ui64 GetVersion() const override; +}; + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp new file mode 100644 index 000000000000..1fe6820cf547 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.cpp @@ -0,0 +1,53 @@ +#include "snapshot_scheme.h" + +namespace NKikimr::NOlap { + +TSnapshotSchema::TSnapshotSchema(TIndexInfo&& indexInfo, const TSnapshot& snapshot) + : IndexInfo(std::move(indexInfo)) + , Schema(IndexInfo.ArrowSchemaWithSpecials()) + , Snapshot(snapshot) +{ +} + +TColumnSaver TSnapshotSchema::GetColumnSaver(const ui32 columnId) const { + return IndexInfo.GetColumnSaver(columnId); +} + +std::shared_ptr TSnapshotSchema::GetColumnLoaderOptional(const ui32 columnId) const { + return IndexInfo.GetColumnLoaderOptional(columnId); +} + +std::optional TSnapshotSchema::GetColumnIdOptional(const std::string& columnName) const { + return IndexInfo.GetColumnIdOptional(columnName); +} + +int TSnapshotSchema::GetFieldIndex(const ui32 columnId) const { + const TString& columnName = IndexInfo.GetColumnName(columnId, false); + if (!columnName) { + return -1; + } + std::string name(columnName.data(), columnName.size()); + return Schema->GetFieldIndex(name); +} + +const std::shared_ptr& TSnapshotSchema::GetSchema() const { + return Schema; +} + +const TIndexInfo& TSnapshotSchema::GetIndexInfo() const { + return IndexInfo; +} + +const TSnapshot& TSnapshotSchema::GetSnapshot() const { + return Snapshot; +} + +ui32 TSnapshotSchema::GetColumnsCount() const { + return Schema->num_fields(); +} + +ui64 TSnapshotSchema::GetVersion() const { + return IndexInfo.GetVersion(); +} + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h new file mode 100644 index 000000000000..539d8f99a02c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/snapshot_scheme.h @@ -0,0 +1,38 @@ +#pragma once + +#include "abstract_scheme.h" + +#include + +namespace NKikimr::NOlap { + +class TSnapshotSchema: public ISnapshotSchema { +private: + TIndexInfo IndexInfo; + std::shared_ptr Schema; + TSnapshot Snapshot; +protected: + virtual TString DoDebugString() const override { + return TStringBuilder() << "(" + "schema=" << Schema->ToString() << ";" << + "snapshot=" << Snapshot.DebugString() << ";" << + "index_info=" << IndexInfo.DebugString() << ";" << + ")" + ; + } +public: + TSnapshotSchema(TIndexInfo&& indexInfo, const TSnapshot& snapshot); + + TColumnSaver GetColumnSaver(const ui32 columnId) const override; + std::shared_ptr GetColumnLoaderOptional(const ui32 columnId) const override; + std::optional GetColumnIdOptional(const std::string& columnName) const override; + int GetFieldIndex(const ui32 columnId) const override; + + const std::shared_ptr& GetSchema() const override; + const TIndexInfo& GetIndexInfo() const override; + const TSnapshot& GetSnapshot() const override; + ui32 GetColumnsCount() const override; + ui64 GetVersion() const override; +}; + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.cpp b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.cpp new file mode 100644 index 000000000000..e7cf6f30ed36 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.cpp @@ -0,0 +1,31 @@ +#include "versioned_index.h" +#include "snapshot_scheme.h" + +#include + +namespace NKikimr::NOlap { + +void TVersionedIndex::AddIndex(const TSnapshot& snapshot, TIndexInfo&& indexInfo) { + if (Snapshots.empty()) { + PrimaryKey = indexInfo.GetPrimaryKey(); + } else { + Y_ABORT_UNLESS(PrimaryKey->Equals(indexInfo.GetPrimaryKey())); + } + + const bool needActualization = indexInfo.GetSchemeNeedActualization(); + auto newVersion = indexInfo.GetVersion(); + auto itVersion = SnapshotByVersion.emplace(newVersion, std::make_shared(std::move(indexInfo), snapshot)); + if (!itVersion.second) { + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("message", "Skip registered version")("version", LastSchemaVersion); + } else if (needActualization) { + if (!SchemeVersionForActualization || *SchemeVersionForActualization < newVersion) { + SchemeVersionForActualization = newVersion; + SchemeForActualization = itVersion.first->second; + } + } + auto itSnap = Snapshots.emplace(snapshot, itVersion.first->second); + Y_ABORT_UNLESS(itSnap.second); + LastSchemaVersion = std::max(newVersion, LastSchemaVersion); +} + +} diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h new file mode 100644 index 000000000000..d83d338350d7 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/versioned_index.h @@ -0,0 +1,68 @@ +#pragma once +#include "abstract_scheme.h" + +namespace NKikimr::NOlap { + +class TVersionedIndex { + std::map Snapshots; + std::shared_ptr PrimaryKey; + std::map SnapshotByVersion; + ui64 LastSchemaVersion = 0; + std::optional SchemeVersionForActualization; + ISnapshotSchema::TPtr SchemeForActualization; +public: + ISnapshotSchema::TPtr GetLastCriticalSchema() const { + return SchemeForActualization; + } + + ISnapshotSchema::TPtr GetLastCriticalSchemaDef(const ISnapshotSchema::TPtr defaultSchema) const { + auto result = GetLastCriticalSchema(); + return result ? result : defaultSchema; + } + + TString DebugString() const { + TStringBuilder sb; + for (auto&& i : Snapshots) { + sb << i.first << ":" << i.second->DebugString() << ";"; + } + return sb; + } + + ISnapshotSchema::TPtr GetSchema(const ui64 version) const { + auto it = SnapshotByVersion.find(version); + return it == SnapshotByVersion.end() ? nullptr : it->second; + } + + ISnapshotSchema::TPtr GetSchemaVerified(const ui64 version) const { + auto it = SnapshotByVersion.find(version); + Y_ABORT_UNLESS(it != SnapshotByVersion.end(), "no schema for version %lu", version); + return it->second; + } + + ISnapshotSchema::TPtr GetSchema(const TSnapshot& version) const { + for (auto it = Snapshots.rbegin(); it != Snapshots.rend(); ++it) { + if (it->first <= version) { + return it->second; + } + } + Y_ABORT_UNLESS(!Snapshots.empty()); + Y_ABORT_UNLESS(version.IsZero()); + return Snapshots.begin()->second; // For old compaction logic compatibility + } + + ISnapshotSchema::TPtr GetLastSchema() const { + Y_ABORT_UNLESS(!Snapshots.empty()); + return Snapshots.rbegin()->second; + } + + bool IsEmpty() const { + return Snapshots.empty(); + } + + const std::shared_ptr& GetPrimaryKey() const noexcept { + return PrimaryKey; + } + + void AddIndex(const TSnapshot& snapshot, TIndexInfo&& indexInfo); +}; +} diff --git a/ydb/core/tx/columnshard/engines/scheme/versions/ya.make b/ydb/core/tx/columnshard/engines/scheme/versions/ya.make new file mode 100644 index 000000000000..63dc44a74899 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/scheme/versions/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + abstract_scheme.cpp + snapshot_scheme.cpp + filtered_scheme.cpp + versioned_index.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/engines/scheme/ya.make b/ydb/core/tx/columnshard/engines/scheme/ya.make index 2067cdd542b7..f2b04cd56c89 100644 --- a/ydb/core/tx/columnshard/engines/scheme/ya.make +++ b/ydb/core/tx/columnshard/engines/scheme/ya.make @@ -15,6 +15,13 @@ PEERDIR( ydb/library/actors/core ydb/core/tx/columnshard/engines/scheme/indexes + ydb/core/tx/columnshard/engines/scheme/statistics + ydb/core/tx/columnshard/engines/scheme/abstract + ydb/core/tx/columnshard/engines/scheme/versions + ydb/core/tx/columnshard/engines/scheme/tiering + ydb/core/tx/columnshard/engines/scheme/column + ydb/core/tx/columnshard/blobs_action/abstract + ydb/core/tx/columnshard/engines/changes/compaction ) YQL_LAST_ABI_VERSION() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/abstract.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/abstract.cpp new file mode 100644 index 000000000000..d193e92e3e1a --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/abstract.cpp @@ -0,0 +1,5 @@ +#include "abstract.h" + +namespace NKikimr::NOlap::NActualizer { + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/abstract.h b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/abstract.h new file mode 100644 index 000000000000..56db4cf2fa4f --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/abstract.h @@ -0,0 +1,30 @@ +#pragma once +#include "context.h" + +#include + +namespace NKikimr::NOlap::NActualizer { + +class IActualizer { +protected: + virtual void DoAddPortion(const TPortionInfo& info, const TAddExternalContext& context) = 0; + virtual void DoRemovePortion(const ui64 portionId) = 0; + virtual void DoExtractTasks(TTieringProcessContext& tasksContext, const TExternalTasksContext& externalContext, TInternalTasksContext& internalContext) = 0; +public: + virtual ~IActualizer() = default; + void ExtractTasks(TTieringProcessContext& tasksContext, const TExternalTasksContext& externalContext, TInternalTasksContext& internalContext) { + return DoExtractTasks(tasksContext, externalContext, internalContext); + } + void AddPortion(const std::shared_ptr& info, const TAddExternalContext& context) { + AFL_VERIFY(info); + if (info->HasRemoveSnapshot()) { + return; + } + return DoAddPortion(*info, context); + } + void RemovePortion(const ui64 portionId) { + return DoRemovePortion(portionId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/context.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/context.cpp new file mode 100644 index 000000000000..6ef73de8ceb0 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/context.cpp @@ -0,0 +1,5 @@ +#include "context.h" + +namespace NKikimr::NOlap::NActualizer { + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/context.h b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/context.h new file mode 100644 index 000000000000..3e50ee118d43 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/context.h @@ -0,0 +1,59 @@ +#pragma once +#include + +#include + +#include + +namespace NKikimr::NOlap { +class TPortionInfo; +} + +namespace NKikimr::NOlap::NActualizer { + +class TTieringProcessContext; + +class TAddExternalContext { +private: + YDB_READONLY_DEF(TInstant, Now); + YDB_ACCESSOR(bool, PortionExclusiveGuarantee, true); + const THashMap>& Portions; +public: + TAddExternalContext(const TInstant now, const THashMap>& portions) + : Now(now) + , Portions(portions) + { + + } + + const THashMap>& GetPortions() const { + return Portions; + } +}; + +class TExternalTasksContext { +private: + const THashMap>& Portions; +public: + const THashMap>& GetPortions() const { + return Portions; + } + + const std::shared_ptr& GetPortionVerified(const ui64 portionId) const { + auto it = Portions.find(portionId); + AFL_VERIFY(it != Portions.end()); + return it->second; + } + + TExternalTasksContext(const THashMap>& portions) + : Portions(portions) + { + + } +}; + +class TInternalTasksContext { +public: +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/ya.make b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/ya.make new file mode 100644 index 000000000000..90979469658b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/abstract/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + abstract.cpp + context.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/versions +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/common/address.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/common/address.cpp new file mode 100644 index 000000000000..6fae8d9c9b73 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/common/address.cpp @@ -0,0 +1,33 @@ +#include "address.h" + +#include + +#include + +#include +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +TRWAddress::TRWAddress(std::set&& readStorages, std::set&& writeStorages): ReadStorages(std::move(readStorages)) +, WriteStorages(std::move(writeStorages)) { + AFL_VERIFY(!ReadStorages.contains("")); + AFL_VERIFY(!WriteStorages.contains("")); + if (WriteStorages.contains(NTiering::NCommon::DeleteTierName)) { + AFL_VERIFY(WriteStorages.size() == 1); + } + Hash = 0; + for (auto&& i : ReadStorages) { + Hash = CombineHashes(Hash, CityHash64(i.data(), i.size())); + } + for (auto&& i : WriteStorages) { + Hash = CombineHashes(Hash, CityHash64(i.data(), i.size())); + } +} + +TString TRWAddress::DebugString() const { + return "R:" + JoinSeq(",", ReadStorages) + ";W:" + JoinSeq(",", WriteStorages); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/common/address.h b/ydb/core/tx/columnshard/engines/storage/actualizer/common/address.h new file mode 100644 index 000000000000..31a74b7cc019 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/common/address.h @@ -0,0 +1,34 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +class TRWAddress { +private: + std::set ReadStorages; + std::set WriteStorages; + ui64 Hash = 0; +public: + bool WriteIs(const TString& storageId) const { + return WriteStorages.size() == 1 && WriteStorages.contains(storageId); + } + + bool ReadIs(const TString& storageId) const { + return ReadStorages.size() == 1 && ReadStorages.contains(storageId); + } + + TString DebugString() const; + + TRWAddress(std::set&& readStorages, std::set&& writeStorages); + + bool operator==(const TRWAddress& item) const { + return ReadStorages == item.ReadStorages && WriteStorages == item.WriteStorages; + } + + operator size_t() const { + return Hash; + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/common/ya.make b/ydb/core/tx/columnshard/engines/storage/actualizer/common/ya.make new file mode 100644 index 000000000000..ae034445d0c4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/common/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + address.cpp +) + +PEERDIR( + ydb/library/actors/core +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.cpp new file mode 100644 index 000000000000..f6e4cca64311 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.cpp @@ -0,0 +1,5 @@ +#include "counters.h" + +namespace NKikimr::NOlap::NActualizer { + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.h b/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.h new file mode 100644 index 000000000000..7bf8aa895f69 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/counters/counters.h @@ -0,0 +1,109 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +class TPortionCategoryCounterAgents: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; +public: + const std::shared_ptr RecordsCount; + const std::shared_ptr Count; + const std::shared_ptr Bytes; + TPortionCategoryCounterAgents(NColumnShard::TCommonCountersOwner& base, const TString& categoryName) + : TBase(base, "category", categoryName) + , RecordsCount(TBase::GetValueAutoAggregations("ByGranule/Portions/RecordsCount")) + , Count(TBase::GetValueAutoAggregations("ByGranule/Portions/Count")) + , Bytes(TBase::GetValueAutoAggregations("ByGranule/Portions/Bytes")) + { + } +}; + +class TPortionCategoryCounters { +private: + std::shared_ptr RecordsCount; + std::shared_ptr Count; + std::shared_ptr Bytes; +public: + TPortionCategoryCounters(TPortionCategoryCounterAgents& agents) + { + RecordsCount = agents.RecordsCount->GetClient(); + Count = agents.Count->GetClient(); + Bytes = agents.Bytes->GetClient(); + } + + void AddPortion(const std::shared_ptr& p) { + RecordsCount->Add(p->NumRows()); + Count->Add(1); + Bytes->Add(p->GetTotalBlobBytes()); + } + + void RemovePortion(const std::shared_ptr& p) { + RecordsCount->Remove(p->NumRows()); + Count->Remove(1); + Bytes->Remove(p->GetTotalBlobBytes()); + } +}; + +class TGlobalCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + std::shared_ptr PortionsWaitingEviction; + std::shared_ptr PortionsWaitingDelete; + std::shared_ptr PortionsLatenessEviction; + std::shared_ptr PortionsLatenessDelete; + std::shared_ptr PortionsToSyncSchema; +public: + + TGlobalCounters() + : TBase("Actualizer") + { + PortionsWaitingEviction = std::make_shared(*this, "eviction_waiting"); + PortionsWaitingDelete = std::make_shared(*this, "delete_waiting"); + PortionsLatenessEviction = std::make_shared(*this, "eviction_lateness"); + PortionsLatenessDelete = std::make_shared(*this, "delete_lateness"); + PortionsToSyncSchema = std::make_shared(*this, "sync_schema"); + } + + static std::shared_ptr BuildPortionsWaitingEvictionAggregation() { + return std::make_shared(*Singleton()->PortionsWaitingEviction); + } + + static std::shared_ptr BuildPortionsWaitingDeleteAggregation() { + return std::make_shared(*Singleton()->PortionsWaitingDelete); + } + + static std::shared_ptr BuildPortionsLatenessEvictionAggregation() { + return std::make_shared(*Singleton()->PortionsLatenessEviction); + } + + static std::shared_ptr BuildPortionsLatenessDeleteAggregation() { + return std::make_shared(*Singleton()->PortionsLatenessDelete); + } + + static std::shared_ptr BuildPortionsToSyncSchemaAggregation() { + return std::make_shared(*Singleton()->PortionsToSyncSchema); + } +}; + +class TCounters { +public: + const std::shared_ptr PortionsWaitingEviction; + const std::shared_ptr PortionsWaitingDelete; + const std::shared_ptr PortionsLatenessEviction; + const std::shared_ptr PortionsLatenessDelete; + const std::shared_ptr PortionsToSyncSchema; + + TCounters() + : PortionsWaitingEviction(TGlobalCounters::BuildPortionsWaitingEvictionAggregation()) + , PortionsWaitingDelete(TGlobalCounters::BuildPortionsWaitingDeleteAggregation()) + , PortionsLatenessEviction(TGlobalCounters::BuildPortionsLatenessEvictionAggregation()) + , PortionsLatenessDelete(TGlobalCounters::BuildPortionsLatenessDeleteAggregation()) + , PortionsToSyncSchema(TGlobalCounters::BuildPortionsToSyncSchemaAggregation()) + { + } + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/counters/ya.make b/ydb/core/tx/columnshard/engines/storage/actualizer/counters/ya.make new file mode 100644 index 000000000000..d73b370747be --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/counters/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + counters.cpp +) + +PEERDIR( + ydb/library/actors/core + ydb/core/tx/columnshard/engines/portions + ydb/core/tx/columnshard/counters/common +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/index/index.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/index/index.cpp new file mode 100644 index 000000000000..91805b0ef283 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/index/index.cpp @@ -0,0 +1,54 @@ +#include "index.h" +#include +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +void TGranuleActualizationIndex::ExtractActualizationTasks(TTieringProcessContext& tasksContext, const NActualizer::TExternalTasksContext& externalContext) const { + TInternalTasksContext internalContext; + for (auto&& i : Actualizers) { + i->ExtractTasks(tasksContext, externalContext, internalContext); + } +} + +void TGranuleActualizationIndex::AddPortion(const std::shared_ptr& portion, const TAddExternalContext& context) { + for (auto&& i : Actualizers) { + i->AddPortion(portion, context); + } +} + +void TGranuleActualizationIndex::RemovePortion(const std::shared_ptr& portion) { + for (auto&& i : Actualizers) { + i->RemovePortion(portion->GetPortionId()); + } +} + +void TGranuleActualizationIndex::RefreshTiering(const std::optional& info, const TAddExternalContext& context) { + AFL_VERIFY(TieringActualizer); + TieringActualizer->Refresh(info, context); + NYDBTest::TControllers::GetColumnShardController()->OnActualizationRefreshTiering(); +} + +void TGranuleActualizationIndex::RefreshScheme(const TAddExternalContext& context) { + AFL_VERIFY(SchemeActualizer); + SchemeActualizer->Refresh(context); + NYDBTest::TControllers::GetColumnShardController()->OnActualizationRefreshScheme(); +} + +TGranuleActualizationIndex::TGranuleActualizationIndex(const ui64 pathId, const TVersionedIndex& versionedIndex) + : PathId(pathId) + , VersionedIndex(versionedIndex) +{ + Y_UNUSED(PathId); +} + +void TGranuleActualizationIndex::Start() { + AFL_VERIFY(Actualizers.empty()); + TieringActualizer = std::make_shared(PathId, VersionedIndex); + SchemeActualizer = std::make_shared(PathId, VersionedIndex); + Actualizers.emplace_back(TieringActualizer); + Actualizers.emplace_back(SchemeActualizer); +} + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/index/index.h b/ydb/core/tx/columnshard/engines/storage/actualizer/index/index.h new file mode 100644 index 000000000000..a67fac3a5cdb --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/index/index.h @@ -0,0 +1,37 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap { +class TVersionedIndex; +class TTiering; +} + +namespace NKikimr::NOlap::NActualizer { +class TTieringActualizer; +class TSchemeActualizer; + +class TGranuleActualizationIndex { +private: + TCounters Counters; + std::vector> Actualizers; + + std::shared_ptr TieringActualizer; + std::shared_ptr SchemeActualizer; + + const ui64 PathId; + const TVersionedIndex& VersionedIndex; +public: + void Start(); + TGranuleActualizationIndex(const ui64 pathId, const TVersionedIndex& versionedIndex); + + void ExtractActualizationTasks(TTieringProcessContext& tasksContext, const NActualizer::TExternalTasksContext& externalContext) const; + + void RefreshTiering(const std::optional& info, const TAddExternalContext& context); + void RefreshScheme(const TAddExternalContext& context); + + void AddPortion(const std::shared_ptr& portion, const TAddExternalContext& context); + void RemovePortion(const std::shared_ptr& portion); +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/index/ya.make b/ydb/core/tx/columnshard/engines/storage/actualizer/index/ya.make new file mode 100644 index 000000000000..8308be890dca --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/index/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + index.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/versions +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.cpp new file mode 100644 index 000000000000..146a8700def1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.cpp @@ -0,0 +1,5 @@ +#include "counters.h" + +namespace NKikimr::NOlap::NStorageOptimizer::NBuckets { + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h new file mode 100644 index 000000000000..8f5ab2ff4311 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/counters.h @@ -0,0 +1,47 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +class TSchemeGlobalCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + + std::shared_ptr QueueSizeInternalWrite; + std::shared_ptr QueueSizeExternalWrite; +public: + TSchemeGlobalCounters() + : TBase("SchemeActualizer") + { + QueueSizeExternalWrite = TBase::GetValueAutoAggregations("Granule/Scheme/Actualization/QueueSize/ExternalWrite"); + QueueSizeInternalWrite = TBase::GetValueAutoAggregations("Granule/Scheme/Actualization/QueueSize/InternalWrite"); + } + + static std::shared_ptr BuildQueueSizeExternalWrite() { + return Singleton()->QueueSizeExternalWrite->GetClient(); + } + + static std::shared_ptr BuildQueueSizeInternalWrite() { + return Singleton()->QueueSizeInternalWrite->GetClient(); + } + +}; + +class TSchemeCounters { +public: + const std::shared_ptr QueueSizeInternalWrite; + const std::shared_ptr QueueSizeExternalWrite; + + TSchemeCounters() + : QueueSizeInternalWrite(TSchemeGlobalCounters::BuildQueueSizeInternalWrite()) + , QueueSizeExternalWrite(TSchemeGlobalCounters::BuildQueueSizeExternalWrite()) +{ + } + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/scheme.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/scheme.cpp new file mode 100644 index 000000000000..b2def23842d4 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/scheme.cpp @@ -0,0 +1,117 @@ +#include "scheme.h" +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +std::optional TSchemeActualizer::BuildActualizationInfo(const TPortionInfo& portion) const { + AFL_VERIFY(TargetSchema); + const TString& currentTierName = portion.GetTierNameDef(IStoragesManager::DefaultStorageId); + auto portionSchema = portion.GetSchema(VersionedIndex); + if (portionSchema->GetVersion() < TargetSchema->GetVersion()) { + auto storagesWrite = TargetSchema->GetIndexInfo().GetUsedStorageIds(currentTierName); + auto storagesRead = portionSchema->GetIndexInfo().GetUsedStorageIds(currentTierName); + TRWAddress address(std::move(storagesRead), std::move(storagesWrite)); + return TFullActualizationInfo(std::move(address), TargetSchema); + } + return {}; +} + +void TSchemeActualizer::DoAddPortion(const TPortionInfo& info, const TAddExternalContext& addContext) { + if (!TargetSchema) { + return; + } + if (!addContext.GetPortionExclusiveGuarantee()) { + if (PortionsInfo.contains(info.GetPortionId())) { + return; + } + } else { + AFL_VERIFY(!PortionsInfo.contains(info.GetPortionId())); + } + auto actualizationInfo = BuildActualizationInfo(info); + if (!actualizationInfo) { + return; + } + NYDBTest::TControllers::GetColumnShardController()->AddPortionForActualizer(1); + AFL_VERIFY(PortionsToActualizeScheme[actualizationInfo->GetAddress()].emplace(info.GetPortionId()).second); + AFL_VERIFY(PortionsInfo.emplace(info.GetPortionId(), actualizationInfo->ExtractFindId()).second); +} + +void TSchemeActualizer::DoRemovePortion(const ui64 portionId) { + auto it = PortionsInfo.find(portionId); + if (it == PortionsInfo.end()) { + return; + } + auto itAddress = PortionsToActualizeScheme.find(it->second.GetRWAddress()); + AFL_VERIFY(itAddress != PortionsToActualizeScheme.end()); + AFL_VERIFY(itAddress->second.erase(portionId)); + NYDBTest::TControllers::GetColumnShardController()->AddPortionForActualizer(-1); + if (itAddress->second.empty()) { + PortionsToActualizeScheme.erase(itAddress); + } + PortionsInfo.erase(it); +} + +void TSchemeActualizer::DoExtractTasks(TTieringProcessContext& tasksContext, const TExternalTasksContext& externalContext, TInternalTasksContext& /*internalContext*/) { + THashSet portionsToRemove; + for (auto&& [address, portions] : PortionsToActualizeScheme) { + if (!tasksContext.IsRWAddressAvailable(address)) { + continue; + } + for (auto&& portionId : portions) { + auto portion = externalContext.GetPortionVerified(portionId); + if (!address.WriteIs(NBlobOperations::TGlobal::DefaultStorageId) && !address.WriteIs(NTiering::NCommon::DeleteTierName)) { + if (!portion->HasRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized)) { + continue; + } + } + auto info = BuildActualizationInfo(*portion); + AFL_VERIFY(info); + auto portionScheme = portion->GetSchema(VersionedIndex); + TPortionEvictionFeatures features(portionScheme, info->GetTargetScheme(), portion->GetTierNameDef(IStoragesManager::DefaultStorageId)); + features.SetTargetTierName(portion->GetTierNameDef(IStoragesManager::DefaultStorageId)); + + if (!tasksContext.AddPortion(*portion, std::move(features), {})) { + break; + } else { + portionsToRemove.emplace(portion->GetPortionId()); + } + } + } + for (auto&& i : portionsToRemove) { + RemovePortion(i); + } + + ui64 waitQueueExternal = 0; + ui64 waitQueueInternal = 0; + for (auto&& i : PortionsToActualizeScheme) { + if (i.first.WriteIs(IStoragesManager::DefaultStorageId)) { + waitQueueInternal += i.second.size(); + } else { + waitQueueExternal += i.second.size(); + } + } + Counters.QueueSizeInternalWrite->SetValue(waitQueueInternal); + Counters.QueueSizeExternalWrite->SetValue(waitQueueExternal); + +} + +void TSchemeActualizer::Refresh(const TAddExternalContext& externalContext) { + TargetSchema = VersionedIndex.GetLastCriticalSchema(); + if (!TargetSchema) { + AFL_VERIFY(PortionsInfo.empty()); + } else { + NYDBTest::TControllers::GetColumnShardController()->AddPortionForActualizer(-1 * PortionsInfo.size()); + PortionsInfo.clear(); + PortionsToActualizeScheme.clear(); + for (auto&& i : externalContext.GetPortions()) { + AddPortion(i.second, externalContext); + } + } +} + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/scheme.h b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/scheme.h new file mode 100644 index 000000000000..f67335d1f553 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/scheme.h @@ -0,0 +1,73 @@ +#pragma once +#include "counters.h" +#include +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +class TSchemeActualizer: public IActualizer { +private: + const TSchemeCounters Counters; + THashMap> PortionsToActualizeScheme; + std::shared_ptr TargetSchema; + const ui64 PathId; + const TVersionedIndex& VersionedIndex; + + class TFindActualizationInfo { + private: + TRWAddress RWAddress; + public: + const TRWAddress& GetRWAddress() const { + return RWAddress; + } + + TFindActualizationInfo(TRWAddress&& rwAddress) + : RWAddress(std::move(rwAddress)) { + + } + }; + + THashMap PortionsInfo; + + class TFullActualizationInfo { + private: + TRWAddress Address; + YDB_ACCESSOR_DEF(std::shared_ptr, TargetScheme); + public: + TFindActualizationInfo ExtractFindId() { + return TFindActualizationInfo(std::move(Address)); + } + + TString DebugString() const { + return TStringBuilder() << "{address=" << Address.DebugString() << ";target_scheme=" << TargetScheme->DebugString() << "}"; + } + + const TRWAddress& GetAddress() const { + return Address; + } + + TFullActualizationInfo(TRWAddress&& address, const std::shared_ptr& targetScheme) + : Address(std::move(address)) + , TargetScheme(targetScheme) { + + } + }; + + std::optional BuildActualizationInfo(const TPortionInfo& portion) const; + +protected: + virtual void DoAddPortion(const TPortionInfo& info, const TAddExternalContext& context) override; + virtual void DoRemovePortion(const ui64 portionId) override; + virtual void DoExtractTasks(TTieringProcessContext& tasksContext, const TExternalTasksContext& externalContext, TInternalTasksContext& internalContext) override; +public: + void Refresh(const TAddExternalContext& externalContext); + + TSchemeActualizer(const ui64 pathId, const TVersionedIndex& versionedIndex) + : PathId(pathId) + , VersionedIndex(versionedIndex) { + Y_UNUSED(PathId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/ya.make b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/ya.make new file mode 100644 index 000000000000..39bde6f3d885 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/scheme/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + scheme.cpp + counters.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/versions +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.cpp new file mode 100644 index 000000000000..146a8700def1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.cpp @@ -0,0 +1,5 @@ +#include "counters.h" + +namespace NKikimr::NOlap::NStorageOptimizer::NBuckets { + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h new file mode 100644 index 000000000000..7d7a1cc3d830 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/counters.h @@ -0,0 +1,78 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +class TTieringGlobalCounters: public NColumnShard::TCommonCountersOwner { +private: + using TBase = NColumnShard::TCommonCountersOwner; + + std::shared_ptr QueueSizeToEvict; + std::shared_ptr QueueSizeToDelete; + std::shared_ptr DifferenceWaitToEvict; + std::shared_ptr DifferenceWaitToDelete; + NMonitoring::TDynamicCounters::TCounterPtr SkipEvictionForCompaction; + NMonitoring::TDynamicCounters::TCounterPtr SkipEvictionForLimit; +public: + TTieringGlobalCounters() + : TBase("TieringActualizer") + { + QueueSizeToEvict = TBase::GetValueAutoAggregations("Granule/Eviction/QueueSize"); + QueueSizeToDelete = TBase::GetValueAutoAggregations("Granule/Deletion/QueueSize"); + DifferenceWaitToEvict = TBase::GetValueAutoAggregations("Granule/Eviction/WaitingInSeconds"); + DifferenceWaitToDelete = TBase::GetValueAutoAggregations("Granule/Deletion/WaitingInSeconds"); + SkipEvictionForCompaction = TBase::GetDeriviative("Eviction/SkipForCompaction"); + SkipEvictionForLimit = TBase::GetDeriviative("Eviction/SkipForLimit"); + } + + static NMonitoring::TDynamicCounters::TCounterPtr GetSkipEvictionForLimit() { + return Singleton()->SkipEvictionForLimit; + } + + static NMonitoring::TDynamicCounters::TCounterPtr GetSkipEvictionForCompaction() { + return Singleton()->SkipEvictionForCompaction; + } + + static std::shared_ptr BuildQueueSizeToEvict() { + return Singleton()->QueueSizeToEvict->GetClient(); + } + + static std::shared_ptr BuildQueueSizeToDelete() { + return Singleton()->QueueSizeToDelete->GetClient(); + } + + static std::shared_ptr BuildDifferenceWaitToEvict() { + return Singleton()->DifferenceWaitToEvict->GetClient(); + } + + static std::shared_ptr BuildDifferenceWaitToDelete() { + return Singleton()->DifferenceWaitToDelete->GetClient(); + } + +}; + +class TTieringCounters { +public: + const std::shared_ptr QueueSizeToEvict; + const std::shared_ptr QueueSizeToDelete; + const std::shared_ptr DifferenceWaitToEvict; + const std::shared_ptr DifferenceWaitToDelete; + const NMonitoring::TDynamicCounters::TCounterPtr SkipEvictionForCompaction; + const NMonitoring::TDynamicCounters::TCounterPtr SkipEvictionForLimit; + + TTieringCounters() + : QueueSizeToEvict(TTieringGlobalCounters::BuildQueueSizeToEvict()) + , QueueSizeToDelete(TTieringGlobalCounters::BuildQueueSizeToDelete()) + , DifferenceWaitToEvict(TTieringGlobalCounters::BuildDifferenceWaitToEvict()) + , DifferenceWaitToDelete(TTieringGlobalCounters::BuildDifferenceWaitToDelete()) + , SkipEvictionForCompaction(TTieringGlobalCounters::GetSkipEvictionForCompaction()) + , SkipEvictionForLimit(TTieringGlobalCounters::GetSkipEvictionForLimit()) { + } + +}; + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp new file mode 100644 index 000000000000..ffd7bb2274a1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.cpp @@ -0,0 +1,184 @@ +#include "tiering.h" +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NActualizer { + +std::shared_ptr TTieringActualizer::GetTargetSchema(const std::shared_ptr& portionSchema) const { + if (!TargetCriticalSchema) { + return portionSchema; + } + if (portionSchema->GetVersion() < TargetCriticalSchema->GetVersion()) { + return TargetCriticalSchema; + } + return portionSchema; +} + +std::optional TTieringActualizer::BuildActualizationInfo(const TPortionInfo& portion, const TInstant now) const { + std::shared_ptr portionSchema = portion.GetSchema(VersionedIndex); + std::shared_ptr targetSchema = GetTargetSchema(portionSchema); + const TString& currentTierName = portion.GetTierNameDef(IStoragesManager::DefaultStorageId); + + if (Tiering) { + AFL_VERIFY(TieringColumnId); + auto statOperator = portionSchema->GetIndexInfo().GetStatistics(NStatistics::TIdentifier(NStatistics::EType::Max, {*TieringColumnId})); + std::shared_ptr max; + if (!statOperator) { + max = portion.MaxValue(*TieringColumnId); + if (!max) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "scalar_less_not_max"); + return {}; + } + } else { + NYDBTest::TControllers::GetColumnShardController()->OnStatisticsUsage(statOperator); + max = statOperator.GetScalarVerified(portion.GetMeta().GetStatisticsStorage()); + } + auto tieringInfo = Tiering->GetTierToMove(max, now); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("tiering_info", tieringInfo.DebugString()); + std::optional d; + std::set storagesWrite; + TString targetTierName; + if (portion.GetTierNameDef(IStoragesManager::DefaultStorageId) != tieringInfo.GetCurrentTierName()) { + d = -1 * tieringInfo.GetCurrentTierLag().GetValue(); + targetTierName = tieringInfo.GetCurrentTierName(); + } else if (tieringInfo.GetNextTierName()) { + d = tieringInfo.GetNextTierWaitingVerified().GetValue(); + targetTierName = tieringInfo.GetNextTierNameVerified(); + } + if (d) { + // if (currentTierName == "deploy_logs_s3" && targetTierName == IStoragesManager::DefaultStorageId) { + // AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("tiering_info", tieringInfo.DebugString())("max", max->ToString())("now", now.ToString())("d", *d)("tiering", Tiering->GetDebugString())("pathId", PathId); + // AFL_VERIFY(false)("tiering_info", tieringInfo.DebugString())("max", max->ToString())("now", now.ToString())("d", *d)("tiering", Tiering->GetDebugString())("pathId", PathId); + // } + auto storagesWrite = targetSchema->GetIndexInfo().GetUsedStorageIds(targetTierName); + auto storagesRead = portionSchema->GetIndexInfo().GetUsedStorageIds(currentTierName); + return TFullActualizationInfo(TRWAddress(std::move(storagesRead), std::move(storagesWrite)), targetTierName, *d, targetSchema); + } + } else if (currentTierName != IStoragesManager::DefaultStorageId) { + // if (currentTierName == "deploy_logs_s3") { + // AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("pathId", PathId); + // AFL_VERIFY(false)("pathId", PathId); + // } + auto storagesWrite = targetSchema->GetIndexInfo().GetUsedStorageIds(IStoragesManager::DefaultStorageId); + auto storagesRead = portionSchema->GetIndexInfo().GetUsedStorageIds(currentTierName); + TRWAddress address(std::move(storagesRead), std::move(storagesWrite)); + return TFullActualizationInfo(std::move(address), IStoragesManager::DefaultStorageId, 0, targetSchema); + } + return {}; +} + +void TTieringActualizer::DoAddPortion(const TPortionInfo& portion, const TAddExternalContext& addContext) { + if (!addContext.GetPortionExclusiveGuarantee()) { + if (PortionsInfo.contains(portion.GetPortionId())) { + return; + } + } else { + AFL_VERIFY(!PortionsInfo.contains(portion.GetPortionId())); + } + auto info = BuildActualizationInfo(portion, addContext.GetNow()); + if (!info) { + return; + } + AFL_VERIFY(PortionIdByWaitDuration[info->GetAddress()].AddPortion(*info, portion.GetPortionId(), addContext.GetNow() - StartInstant)); + auto address = info->GetAddress(); + TFindActualizationInfo findId(std::move(address), info->GetWaitDuration() + (addContext.GetNow() - StartInstant)); + AFL_VERIFY(PortionsInfo.emplace(portion.GetPortionId(), std::move(findId)).second); +} + +void TTieringActualizer::DoRemovePortion(const ui64 portionId) { + auto it = PortionsInfo.find(portionId); + if (it == PortionsInfo.end()) { + return; + } + auto itAddress = PortionIdByWaitDuration.find(it->second.GetRWAddress()); + AFL_VERIFY(itAddress != PortionIdByWaitDuration.end()); + if (itAddress->second.RemovePortion(it->second, portionId)) { + PortionIdByWaitDuration.erase(itAddress); + } + PortionsInfo.erase(it); +} + +void TTieringActualizer::DoExtractTasks(TTieringProcessContext& tasksContext, const TExternalTasksContext& externalContext, TInternalTasksContext& /*internalContext*/) { + THashSet portionIds; + for (auto&& [address, addressPortions] : PortionIdByWaitDuration) { + if (!tasksContext.IsRWAddressAvailable(address)) { + Counters.SkipEvictionForLimit->Add(1); + continue; + } + for (auto&& [duration, portions] : addressPortions.GetPortions()) { + if (duration - (tasksContext.Now - StartInstant) > TDuration::Zero()) { + break; + } + bool limitEnriched = false; + for (auto&& p : portions) { + auto portion = externalContext.GetPortionVerified(p); + if (!address.WriteIs(NBlobOperations::TGlobal::DefaultStorageId) && !address.WriteIs(NTiering::NCommon::DeleteTierName)) { + if (!portion->HasRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized)) { + Counters.SkipEvictionForCompaction->Add(1); + continue; + } + } + auto info = BuildActualizationInfo(*portion, tasksContext.Now); + AFL_VERIFY(info); + auto portionScheme = portion->GetSchema(VersionedIndex); + TPortionEvictionFeatures features(portionScheme, info->GetTargetScheme(), portion->GetTierNameDef(IStoragesManager::DefaultStorageId)); + features.SetTargetTierName(info->GetTargetTierName()); + + if (!tasksContext.AddPortion(*portion, std::move(features), info->GetLateness())) { + limitEnriched = true; + break; + } else { + portionIds.emplace(portion->GetPortionId()); + } + } + if (limitEnriched) { + break; + } + } + } + for (auto&& i : portionIds) { + RemovePortion(i); + } + + ui64 waitDurationEvict = 0; + ui64 waitQueueEvict = 0; + ui64 waitDurationDelete = 0; + ui64 waitQueueDelete = 0; + for (auto&& i : PortionIdByWaitDuration) { + std::shared_ptr waitDurationSignal; + std::shared_ptr queueSizeSignal; + if (i.first.WriteIs(NTiering::NCommon::DeleteTierName)) { + i.second.CorrectSignals(waitQueueDelete, waitDurationDelete, tasksContext.Now - StartInstant); + } else { + i.second.CorrectSignals(waitQueueEvict, waitDurationEvict, tasksContext.Now - StartInstant); + } + } + Counters.DifferenceWaitToDelete->SetValue(waitDurationDelete); + Counters.DifferenceWaitToEvict->SetValue(waitDurationEvict); + Counters.QueueSizeToDelete->SetValue(waitQueueDelete); + Counters.QueueSizeToEvict->SetValue(waitQueueEvict); + +} + +void TTieringActualizer::Refresh(const std::optional& info, const TAddExternalContext& externalContext) { + StartInstant = externalContext.GetNow(); + Tiering = info; + if (Tiering) { + TieringColumnId = VersionedIndex.GetLastSchema()->GetColumnId(Tiering->GetTtlColumn()); + } else { + TieringColumnId = {}; + } + TargetCriticalSchema = VersionedIndex.GetLastCriticalSchema(); + PortionsInfo.clear(); + PortionIdByWaitDuration.clear(); + + for (auto&& i : externalContext.GetPortions()) { + AddPortion(i.second, externalContext); + } +} + +} diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.h b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.h new file mode 100644 index 000000000000..96c8a13c00ce --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/tiering.h @@ -0,0 +1,142 @@ +#pragma once +#include "counters.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap { +class TTiering; +} + +namespace NKikimr::NOlap::NActualizer { + +class TTieringActualizer: public IActualizer { +private: + TTieringCounters Counters; + class TFullActualizationInfo { + private: + TRWAddress Address; + YDB_ACCESSOR_DEF(TString, TargetTierName); + YDB_ACCESSOR_DEF(ISnapshotSchema::TPtr, TargetScheme); + i64 WaitDurationValue; + public: + TString DebugString() const { + return TStringBuilder() << "{address=" << Address.DebugString() << ";target_tier=" << TargetTierName << ";wait_duration=" << TDuration::FromValue(WaitDurationValue) << "}"; + } + + const TRWAddress& GetAddress() const { + return Address; + } + + TFullActualizationInfo(TRWAddress&& address, const TString& targetTierName, const i64 waitDurationValue, const ISnapshotSchema::TPtr& targetScheme) + : Address(std::move(address)) + , TargetTierName(targetTierName) + , TargetScheme(targetScheme) + , WaitDurationValue(waitDurationValue) + { + + } + + TDuration GetWaitDuration() const { + if (WaitDurationValue >= 0) { + return TDuration::FromValue(WaitDurationValue); + } else { + return TDuration::Zero(); + } + } + + TDuration GetLateness() const { + if (WaitDurationValue >= 0) { + return TDuration::Zero(); + } else { + return TDuration::FromValue(-WaitDurationValue); + } + } + }; + + class TFindActualizationInfo { + private: + TRWAddress RWAddress; + YDB_READONLY_DEF(TDuration, WaitDuration); + public: + const TRWAddress& GetRWAddress() const { + return RWAddress; + } + + TFindActualizationInfo(TRWAddress&& rwAddress, const TDuration waitDuration) + : RWAddress(std::move(rwAddress)) + , WaitDuration(waitDuration) { + + } + }; + + class TRWAddressPortionsInfo { + private: + std::map> Portions; + public: + const std::map>& GetPortions() const { + return Portions; + } + + void CorrectSignals(ui64& queueSize, ui64& waitSeconds, const TDuration dCorrect) const { + if (Portions.empty()) { + return; + } + for (auto&& i : Portions) { + if (i.first > dCorrect) { + break; + } + queueSize += i.second.size(); + } + if (Portions.begin()->first < dCorrect) { + waitSeconds = std::max(waitSeconds, (dCorrect - Portions.begin()->first).Seconds()); + } + } + + [[nodiscard]] bool AddPortion(const TFullActualizationInfo& info, const ui64 portionId, const TDuration dCorrection) { + return Portions[info.GetWaitDuration() + dCorrection].emplace(portionId).second; + } + + bool RemovePortion(const TFindActualizationInfo& info, const ui64 portionId) { + auto itDuration = Portions.find(info.GetWaitDuration()); + AFL_VERIFY(itDuration != Portions.end()); + AFL_VERIFY(itDuration->second.erase(portionId)); + if (itDuration->second.empty()) { + Portions.erase(itDuration); + } + return Portions.empty(); + } + }; + + std::optional Tiering; + std::optional TieringColumnId; + + std::shared_ptr TargetCriticalSchema; + const ui64 PathId; + const TVersionedIndex& VersionedIndex; + + TInstant StartInstant = TInstant::Zero(); + THashMap PortionIdByWaitDuration; + THashMap PortionsInfo; + + std::shared_ptr GetTargetSchema(const std::shared_ptr& portionSchema) const; + + std::optional BuildActualizationInfo(const TPortionInfo& portion, const TInstant now) const; + + virtual void DoAddPortion(const TPortionInfo& portion, const TAddExternalContext& addContext) override; + virtual void DoRemovePortion(const ui64 portionId) override; + virtual void DoExtractTasks(TTieringProcessContext& tasksContext, const TExternalTasksContext& externalContext, TInternalTasksContext& internalContext) override; + +public: + void Refresh(const std::optional& info, const TAddExternalContext& externalContext); + + TTieringActualizer(const ui64 pathId, const TVersionedIndex& versionedIndex) + : PathId(pathId) + , VersionedIndex(versionedIndex) + { + Y_UNUSED(PathId); + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/ya.make b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/ya.make new file mode 100644 index 000000000000..cea28ce6b79c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/tiering/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + tiering.cpp + counters.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/versions +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/actualizer/ya.make b/ydb/core/tx/columnshard/engines/storage/actualizer/ya.make new file mode 100644 index 000000000000..0689e11c04e6 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/actualizer/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +PEERDIR( + ydb/core/tx/columnshard/engines/storage/actualizer/index + ydb/core/tx/columnshard/engines/storage/actualizer/common + ydb/core/tx/columnshard/engines/storage/actualizer/abstract + ydb/core/tx/columnshard/engines/storage/actualizer/scheme + ydb/core/tx/columnshard/engines/storage/actualizer/tiering + ydb/core/tx/columnshard/engines/storage/actualizer/counters +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp new file mode 100644 index 000000000000..485802b0e3c1 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.cpp @@ -0,0 +1,18 @@ +#include "column.h" +#include + +namespace NKikimr::NOlap::NChunks { + +std::vector> TChunkPreparation::DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { + auto rb = NArrow::TStatusValidator::GetValid(ColumnInfo.GetLoader()->Apply(Data)); + + auto chunks = TSimpleSplitter(saver, counters).SplitBySizes(rb, Data, splitSizes); + std::vector> newChunks; + for (auto&& i : chunks) { + Y_ABORT_UNLESS(i.GetSlicedBatch()->num_columns() == 1); + newChunks.emplace_back(std::make_shared(saver.Apply(i.GetSlicedBatch()), i.GetSlicedBatch()->column(0), TChunkAddress(GetColumnId(), GetChunkIdxOptional().value_or(0)), ColumnInfo)); + } + return newChunks; +} + +} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/column.h b/ydb/core/tx/columnshard/engines/storage/chunks/column.h new file mode 100644 index 000000000000..7e05b45a9638 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/chunks/column.h @@ -0,0 +1,68 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NChunks { + +class TChunkPreparation: public IPortionColumnChunk { +private: + using TBase = IPortionColumnChunk; + TString Data; + TColumnRecord Record; + TSimpleColumnInfo ColumnInfo; + std::shared_ptr First; + std::shared_ptr Last; +protected: + virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; + virtual const TString& DoGetData() const override { + return Data; + } + virtual ui32 DoGetRecordsCountImpl() const override { + return Record.GetMeta().GetNumRows(); + } + virtual TString DoDebugString() const override { + return ""; + } + virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { + return Record.GetMeta(); + } + virtual std::shared_ptr DoGetFirstScalar() const override { + return First; + } + virtual std::shared_ptr DoGetLastScalar() const override { + return Last; + } + virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& columnInfo) const override { + TColumnRecord cRecord = Record; + cRecord.ResetBlobRange(); + return std::make_shared(std::move(data), cRecord, columnInfo); + } + +public: + const TColumnRecord& GetRecord() const { + return Record; + } + + TChunkPreparation(const TString& data, const TColumnRecord& columnChunk, const TSimpleColumnInfo& columnInfo) + : TBase(columnChunk.ColumnId) + , Data(data) + , Record(columnChunk) + , ColumnInfo(columnInfo) { + AFL_VERIFY(Data.size() == Record.BlobRange.Size || Record.BlobRange.Size == 0)("data", Data.size())("record", Record.BlobRange.Size); + } + + TChunkPreparation(const TString& data, const std::shared_ptr& column, const TChunkAddress& address, const TSimpleColumnInfo& columnInfo) + : TBase(address.GetColumnId()) + , Data(data) + , Record(address, column, columnInfo) + , ColumnInfo(columnInfo) { + Y_ABORT_UNLESS(column->length()); + First = NArrow::TStatusValidator::GetValid(column->GetScalar(0)); + Last = NArrow::TStatusValidator::GetValid(column->GetScalar(column->length() - 1)); + Record.BlobRange.Size = data.size(); + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp new file mode 100644 index 000000000000..373c88a765fa --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.cpp @@ -0,0 +1,11 @@ +#include "data.h" +#include + +namespace NKikimr::NOlap::NChunks { + +void TPortionIndexChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfo& portionInfo) const { + AFL_VERIFY(!bRange.IsValid()); + portionInfo.AddIndex(TIndexChunk(GetEntityId(), GetChunkIdxVerified(), RecordsCount, RawBytes, bRange)); +} + +} // namespace NKikimr::NOlap::NIndexes \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/data.h b/ydb/core/tx/columnshard/engines/storage/chunks/data.h new file mode 100644 index 000000000000..dfa9189e560b --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/chunks/data.h @@ -0,0 +1,48 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NChunks { + +class TPortionIndexChunk: public IPortionDataChunk { +private: + using TBase = IPortionDataChunk; + const ui32 RecordsCount; + const ui64 RawBytes; + const TString Data; +protected: + virtual const TString& DoGetData() const override { + return Data; + } + virtual TString DoDebugString() const override { + return ""; + } + virtual std::vector> DoInternalSplit(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, const std::vector& /*splitSizes*/) const override { + return {}; + } + virtual bool DoIsSplittable() const override { + return false; + } + virtual std::optional DoGetRecordsCount() const override { + return RecordsCount; + } + virtual std::shared_ptr DoGetFirstScalar() const override { + return nullptr; + } + virtual std::shared_ptr DoGetLastScalar() const override { + return nullptr; + } + virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfo& portionInfo) const override; + virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& /*columnInfo*/) const override { + return std::make_shared(GetChunkAddressVerified(), RecordsCount, RawBytes, std::move(data)); + } +public: + TPortionIndexChunk(const TChunkAddress& address, const ui32 recordsCount, const ui64 rawBytes, const TString& data) + : TBase(address.GetColumnId(), address.GetChunkIdx()) + , RecordsCount(recordsCount) + , RawBytes(rawBytes) + , Data(data) + { + } + +}; +} // namespace NKikimr::NOlap::NChunks \ No newline at end of file diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp new file mode 100644 index 000000000000..9aa56e56eda3 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.cpp @@ -0,0 +1,5 @@ +#include "null_column.h" + +namespace NKikimr::NOlap::NChunks { + +} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h new file mode 100644 index 000000000000..3b5f078fcd08 --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/chunks/null_column.h @@ -0,0 +1,51 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NChunks { + +class TNullChunkPreparation: public IPortionColumnChunk { +private: + using TBase = IPortionColumnChunk; + const ui32 RecordsCount; + TString Data; +protected: + virtual std::vector> DoInternalSplitImpl(const TColumnSaver& /*saver*/, const std::shared_ptr& /*counters*/, + const std::vector& /*splitSizes*/) const override { + AFL_VERIFY(false); + return {}; + } + virtual const TString& DoGetData() const override { + return Data; + } + virtual ui32 DoGetRecordsCountImpl() const override { + return RecordsCount; + } + virtual TString DoDebugString() const override { + return TStringBuilder() << "rc=" << RecordsCount << ";data_size=" << Data.size() << ";"; + } + virtual TSimpleChunkMeta DoBuildSimpleChunkMeta() const override { + AFL_VERIFY(false); + return TSimpleChunkMeta(nullptr, false, false); + } + virtual std::shared_ptr DoGetFirstScalar() const override { + return nullptr; + } + virtual std::shared_ptr DoGetLastScalar() const override { + return nullptr; + } + +public: + TNullChunkPreparation(const ui32 columnId, const ui32 recordsCount, const std::shared_ptr& f, const TColumnSaver& saver) + : TBase(columnId) + , RecordsCount(recordsCount) + , Data(saver.Apply(NArrow::TThreadSimpleArraysCache::GetNull(f->type(), recordsCount), f)) + { + Y_ABORT_UNLESS(RecordsCount); + SetChunkIdx(0); + } +}; + +} diff --git a/ydb/core/tx/columnshard/engines/storage/chunks/ya.make b/ydb/core/tx/columnshard/engines/storage/chunks/ya.make new file mode 100644 index 000000000000..d61554bd6f0c --- /dev/null +++ b/ydb/core/tx/columnshard/engines/storage/chunks/ya.make @@ -0,0 +1,17 @@ +LIBRARY() + +SRCS( + data.cpp + column.cpp + null_column.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/splitter/abstract + ydb/core/tx/columnshard/splitter + ydb/core/tx/columnshard/engines/scheme/versions + ydb/core/tx/columnshard/engines/portions + ydb/core/tx/columnshard/counters +) + +END() diff --git a/ydb/core/tx/columnshard/engines/storage/granule.cpp b/ydb/core/tx/columnshard/engines/storage/granule.cpp index aac2d2bdd1b7..54e7668a1419 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule.cpp +++ b/ydb/core/tx/columnshard/engines/storage/granule.cpp @@ -1,18 +1,11 @@ #include "granule.h" #include "storage.h" -#include #include "optimizer/lbuckets/optimizer.h" -namespace NKikimr::NOlap { - -TGranuleAdditiveSummary::ECompactionClass TGranuleMeta::GetCompactionType(const TCompactionLimits& limits) const { - return GetAdditiveSummary().GetCompactionClass( - limits, ModificationLastTime, TMonotonic::Now()); -} +#include +#include -ui64 TGranuleMeta::Size() const { - return GetAdditiveSummary().GetGranuleSize(); -} +namespace NKikimr::NOlap { void TGranuleMeta::UpsertPortion(const TPortionInfo& info) { AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "upsert_portion")("portion", info.DebugString())("path_id", GetPathId()); @@ -50,21 +43,11 @@ bool TGranuleMeta::ErasePortion(const ui64 portion) { return true; } -void TGranuleMeta::AddColumnRecord(const TIndexInfo& indexInfo, const TPortionInfo& portion, const TColumnRecord& rec, const NKikimrTxColumnShard::TIndexPortionMeta* portionMeta) { - auto it = Portions.find(portion.GetPortion()); - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "add_column_record")("portion_info", portion.DebugString())("record", rec.DebugString()); - if (it == Portions.end()) { - Y_ABORT_UNLESS(portion.Records.empty()); - auto portionNew = std::make_shared(portion); - it = Portions.emplace(portion.GetPortion(), portionNew).first; - } else { - AFL_VERIFY(it->second->IsEqualWithSnapshots(portion))("self", it->second->DebugString())("item", portion.DebugString()); - } - it->second->AddRecord(indexInfo, rec, portionMeta); - - if (portionMeta) { - it->second->InitOperator(Owner->GetStoragesManager()->InitializePortionOperator(*it->second), false); - } +void TGranuleMeta::AddColumnRecordOnLoad(const TIndexInfo& indexInfo, const TPortionInfo& portion, const TColumnChunkLoadContext& rec, const NKikimrTxColumnShard::TIndexPortionMeta* portionMeta) { + std::shared_ptr pInfo = UpsertPortionOnLoad(portion); + TColumnRecord cRecord(pInfo->RegisterBlobId(rec.GetBlobRange().GetBlobId()), rec, indexInfo.GetColumnFeaturesVerified(rec.GetAddress().GetColumnId())); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "AddColumnRecordOnLoad")("portion_info", portion.DebugString())("record", cRecord.DebugString()); + pInfo->AddRecord(indexInfo, cRecord, portionMeta); } void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr portionAfter, NStorageOptimizer::IOptimizerPlanner::TModificationGuard* modificationGuard) { @@ -78,17 +61,20 @@ void TGranuleMeta::OnAfterChangePortion(const std::shared_ptr port } else { OptimizerPlanner->StartModificationGuard().AddPortion(portionAfter); } + NActualizer::TAddExternalContext context(HasAppData() ? AppDataVerified().TimeProvider->Now() : TInstant::Now(), Portions); + ActualizationIndex->AddPortion(portionAfter, context); } + Stats->OnAddPortion(*portionAfter); } if (!!AdditiveSummaryCache) { - auto g = AdditiveSummaryCache->StartEdit(Counters); if (portionAfter && !portionAfter->HasRemoveSnapshot()) { + auto g = AdditiveSummaryCache->StartEdit(Counters); g.AddPortion(*portionAfter); } } ModificationLastTime = TMonotonic::Now(); - Owner->UpdateGranuleInfo(*this); + Stats->UpdateGranuleInfo(*this); } void TGranuleMeta::OnBeforeChangePortion(const std::shared_ptr portionBefore) { @@ -107,11 +93,13 @@ void TGranuleMeta::OnBeforeChangePortion(const std::shared_ptr por PortionInfoGuard.OnDropPortion(portionBefore); if (!portionBefore->HasRemoveSnapshot()) { OptimizerPlanner->StartModificationGuard().RemovePortion(portionBefore); + ActualizationIndex->RemovePortion(portionBefore); } + Stats->OnRemovePortion(*portionBefore); } if (!!AdditiveSummaryCache) { - auto g = AdditiveSummaryCache->StartEdit(Counters); if (portionBefore && !portionBefore->HasRemoveSnapshot()) { + auto g = AdditiveSummaryCache->StartEdit(Counters); g.RemovePortion(*portionBefore); } } @@ -121,14 +109,14 @@ void TGranuleMeta::OnCompactionFinished() { AllowInsertionFlag = false; Y_ABORT_UNLESS(Activity.erase(EActivity::GeneralCompaction)); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "OnCompactionFinished")("info", DebugString()); - Owner->UpdateGranuleInfo(*this); + Stats->UpdateGranuleInfo(*this); } void TGranuleMeta::OnCompactionFailed(const TString& reason) { AllowInsertionFlag = false; Y_ABORT_UNLESS(Activity.erase(EActivity::GeneralCompaction)); AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "OnCompactionFailed")("reason", reason)("info", DebugString()); - Owner->UpdateGranuleInfo(*this); + Stats->UpdateGranuleInfo(*this); } void TGranuleMeta::OnCompactionStarted() { @@ -158,14 +146,14 @@ const NKikimr::NOlap::TGranuleAdditiveSummary& TGranuleMeta::GetAdditiveSummary( return *AdditiveSummaryCache; } -TGranuleMeta::TGranuleMeta(const ui64 pathId, std::shared_ptr owner, const NColumnShard::TGranuleDataCounters& counters, const TVersionedIndex& versionedIndex) +TGranuleMeta::TGranuleMeta(const ui64 pathId, const TGranulesStorage& owner, const NColumnShard::TGranuleDataCounters& counters, const TVersionedIndex& versionedIndex) : PathId(pathId) - , Owner(owner) , Counters(counters) - , PortionInfoGuard(Owner->GetCounters().BuildPortionBlobsGuard()) + , PortionInfoGuard(owner.GetCounters().BuildPortionBlobsGuard()) + , Stats(owner.GetStats()) { - Y_ABORT_UNLESS(Owner); - OptimizerPlanner = std::make_shared(PathId, owner->GetStoragesManager(), versionedIndex.GetLastSchema()->GetIndexInfo().GetReplaceKey()); + OptimizerPlanner = std::make_shared(PathId, owner.GetStoragesManager(), versionedIndex.GetLastSchema()->GetIndexInfo().GetReplaceKey()); + ActualizationIndex = std::make_shared(PathId, versionedIndex); } @@ -173,4 +161,17 @@ bool TGranuleMeta::InCompaction() const { return Activity.contains(EActivity::GeneralCompaction); } +std::shared_ptr TGranuleMeta::UpsertPortionOnLoad(const TPortionInfo& portion) { + auto it = Portions.find(portion.GetPortion()); + AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "UpsertPortionOnLoad")("portion_info", portion.DebugString()); + if (it == Portions.end()) { + Y_ABORT_UNLESS(portion.Records.empty()); + auto portionNew = std::make_shared(portion); + it = Portions.emplace(portion.GetPortion(), portionNew).first; + } else { + AFL_VERIFY(it->second->IsEqualWithSnapshots(portion))("self", it->second->DebugString())("item", portion.DebugString()); + } + return it->second; +} + } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/granule.h b/ydb/core/tx/columnshard/engines/storage/granule.h index b7423485094d..7113b69256a7 100644 --- a/ydb/core/tx/columnshard/engines/storage/granule.h +++ b/ydb/core/tx/columnshard/engines/storage/granule.h @@ -1,13 +1,19 @@ #pragma once -#include +#include "optimizer/abstract/optimizer.h" +#include "actualizer/index/index.h" + #include #include #include -#include "optimizer/abstract/optimizer.h" + +#include +#include namespace NKikimr::NOlap { class TGranulesStorage; +class TGranulesStat; +class TColumnChunkLoadContext; class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { private: @@ -19,8 +25,9 @@ class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { } void AddPortion(const TPortionInfo& info) { - const auto sizes = info.BlobsSizes(); - PortionsSize += sizes.first; + ColumnPortionsSize += info.GetColumnBlobBytes(); + TotalPortionsSize += info.GetTotalBlobBytes(); + MetadataMemoryPortionsSize += info.GetMetadataMemorySize(); RecordsCount += info.NumRows(); ++PortionsCount; @@ -35,9 +42,12 @@ class TDataClassSummary: public NColumnShard::TBaseGranuleDataClassSummary { } void RemovePortion(const TPortionInfo& info) { - const auto sizes = info.BlobsSizes(); - PortionsSize -= sizes.first; - Y_ABORT_UNLESS(PortionsSize >= 0); + MetadataMemoryPortionsSize -= info.GetMetadataMemorySize(); + Y_ABORT_UNLESS(MetadataMemoryPortionsSize >= 0); + ColumnPortionsSize -= info.GetColumnBlobBytes(); + Y_ABORT_UNLESS(ColumnPortionsSize >= 0); + TotalPortionsSize -= info.GetTotalBlobBytes(); + Y_ABORT_UNLESS(TotalPortionsSize >= 0); RecordsCount -= info.NumRows(); Y_ABORT_UNLESS(RecordsCount >= 0); --PortionsCount; @@ -60,48 +70,17 @@ class TGranuleAdditiveSummary { TDataClassSummary Compacted; friend class TGranuleMeta; public: - enum class ECompactionClass: ui32 { - Split = 100, - Internal = 50, - WaitInternal = 30, - NoCompaction = 0 - }; - - ECompactionClass GetCompactionClass(const TCompactionLimits& limits, const TMonotonic lastModification, const TMonotonic now) const { - if (GetActivePortionsCount() <= 1) { - return ECompactionClass::NoCompaction; - } - if ((i64)GetGranuleSize() >= limits.GranuleSizeForOverloadPrevent) - { - return ECompactionClass::Split; - } - - if (now - lastModification > TDuration::Seconds(limits.InGranuleCompactSeconds)) { - if (GetInserted().GetPortionsCount()) { - return ECompactionClass::Internal; - } - } else { - if (GetInserted().GetPortionsCount() > 1 && - (GetInserted().GetPortionsSize() >= limits.GranuleIndexedPortionsSizeLimit || - GetInserted().GetPortionsCount() >= limits.GranuleIndexedPortionsCountLimit)) { - return ECompactionClass::Internal; - } - if (GetInserted().GetPortionsCount()) { - return ECompactionClass::WaitInternal; - } - } - - return ECompactionClass::NoCompaction; - } - const TDataClassSummary& GetInserted() const { return Inserted; } const TDataClassSummary& GetCompacted() const { return Compacted; } + ui64 GetMetadataMemoryPortionsSize() const { + return Inserted.GetMetadataMemoryPortionsSize() + Compacted.GetMetadataMemoryPortionsSize(); + } ui64 GetGranuleSize() const { - return Inserted.GetPortionsSize() + Compacted.GetPortionsSize(); + return Inserted.GetTotalPortionsSize() + Compacted.GetTotalPortionsSize(); } ui64 GetActivePortionsCount() const { return Inserted.GetPortionsCount() + Compacted.GetPortionsCount(); @@ -165,10 +144,11 @@ class TGranuleMeta: TNonCopyable { std::set Activity; mutable bool AllowInsertionFlag = false; const ui64 PathId; - std::shared_ptr Owner; const NColumnShard::TGranuleDataCounters Counters; NColumnShard::TEngineLogsCounters::TPortionsInfoGuard PortionInfoGuard; + std::shared_ptr Stats; std::shared_ptr OptimizerPlanner; + std::shared_ptr ActualizationIndex; std::map>> PortionsByPK; void OnBeforeChangePortion(const std::shared_ptr portionBefore); @@ -176,11 +156,35 @@ class TGranuleMeta: TNonCopyable { void OnAdditiveSummaryChange() const; YDB_READONLY(TMonotonic, LastCompactionInstant, TMonotonic::Zero()); public: + void RefreshTiering(const std::optional& tiering) { + NActualizer::TAddExternalContext context(HasAppData() ? AppDataVerified().TimeProvider->Now() : TInstant::Now(), Portions); + ActualizationIndex->RefreshTiering(tiering, context); + } + + void RefreshScheme() { + NActualizer::TAddExternalContext context(HasAppData() ? AppDataVerified().TimeProvider->Now() : TInstant::Now(), Portions); + ActualizationIndex->RefreshScheme(context); + } + + void ReturnToIndexes(const THashSet& portionIds) { + NActualizer::TAddExternalContext context(HasAppData() ? AppDataVerified().TimeProvider->Now() : TInstant::Now(), Portions); + context.SetPortionExclusiveGuarantee(false); + for (auto&& p : portionIds) { + auto it = Portions.find(p); + AFL_VERIFY(it != Portions.end()); + ActualizationIndex->AddPortion(it->second, context); + } + } + + void StartActualizationIndex() { + ActualizationIndex->Start(); + } + NJson::TJsonValue OptimizerSerializeToJson() const { return OptimizerPlanner->SerializeToJsonVisual(); } - std::vector GetBucketPositions() const { + std::vector GetBucketPositions() const { return OptimizerPlanner->GetBucketPositions(); } @@ -188,17 +192,33 @@ class TGranuleMeta: TNonCopyable { LastCompactionInstant = TMonotonic::Now(); } - std::shared_ptr GetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr self, const THashSet& busyPortions) const { - return OptimizerPlanner->GetOptimizationTask(limits, self, busyPortions); + void BuildActualizationTasks(NActualizer::TTieringProcessContext& context) const { + NActualizer::TExternalTasksContext extTasks(Portions); + ActualizationIndex->ExtractActualizationTasks(context, extTasks); + } + + std::shared_ptr GetOptimizationTask(std::shared_ptr self, const std::shared_ptr& locksManager) const { + return OptimizerPlanner->GetOptimizationTask(self, locksManager); } const std::map>>& GroupOrderedPortionsByPK() const { return PortionsByPK; } + std::map> GetPortionsOlderThenSnapshot(const TSnapshot& border) const { + std::map> result; + for (auto&& i : Portions) { + if (i.second->RecordSnapshotMin() <= border) { + result.emplace(i.first, i.second); + } + } + return result; + } + void OnAfterPortionsLoad() { auto g = OptimizerPlanner->StartModificationGuard(); for (auto&& i : Portions) { + i.second->OnAfterLoad(); OnAfterChangePortion(i.second, &g); } } @@ -214,27 +234,22 @@ class TGranuleMeta: TNonCopyable { return result; } - TGranuleAdditiveSummary::ECompactionClass GetCompactionType(const TCompactionLimits& limits) const; const TGranuleAdditiveSummary& GetAdditiveSummary() const; NStorageOptimizer::TOptimizationPriority GetCompactionPriority() const { return OptimizerPlanner->GetUsefulMetric(); } + bool IsLockedOptimizer(const std::shared_ptr& dataLocksManager) const { + return OptimizerPlanner->IsLocked(dataLocksManager); + } + void ActualizeOptimizer(const TInstant currentInstant) const { if (currentInstant - OptimizerPlanner->GetActualizationInstant() > TDuration::Seconds(1)) { OptimizerPlanner->Actualize(currentInstant); } } - bool NeedCompaction(const TCompactionLimits& limits) const { - if (InCompaction() || Empty()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "granule_skipped_by_state")("path_id", GetPathId())("granule_size", Size()); - return false; - } - return GetCompactionType(limits) != TGranuleAdditiveSummary::ECompactionClass::NoCompaction; - } - bool InCompaction() const; bool IsErasable() const { @@ -257,12 +272,22 @@ class TGranuleMeta: TNonCopyable { ; } - void AddColumnRecord(const TIndexInfo& indexInfo, const TPortionInfo& portion, const TColumnRecord& rec, const NKikimrTxColumnShard::TIndexPortionMeta* portionMeta); + std::shared_ptr UpsertPortionOnLoad(const TPortionInfo& portion); + + void AddColumnRecordOnLoad(const TIndexInfo& indexInfo, const TPortionInfo& portion, const TColumnChunkLoadContext& rec, const NKikimrTxColumnShard::TIndexPortionMeta* portionMeta); const THashMap>& GetPortions() const { return Portions; } + std::vector> GetPortionsVector() const { + std::vector> result; + for (auto&& i : Portions) { + result.emplace_back(i.second); + } + return result; + } + ui64 GetPathId() const { return PathId; } @@ -273,7 +298,7 @@ class TGranuleMeta: TNonCopyable { return *it->second; } - std::shared_ptr GetPortionPtr(const ui64 portion) const { + std::shared_ptr GetPortionOptional(const ui64 portion) const { auto it = Portions.find(portion); if (it == Portions.end()) { return nullptr; @@ -283,11 +308,9 @@ class TGranuleMeta: TNonCopyable { bool ErasePortion(const ui64 portion); - explicit TGranuleMeta(const ui64 pathId, std::shared_ptr owner, const NColumnShard::TGranuleDataCounters& counters, const TVersionedIndex& versionedIndex); + explicit TGranuleMeta(const ui64 pathId, const TGranulesStorage& owner, const NColumnShard::TGranuleDataCounters& counters, const TVersionedIndex& versionedIndex); bool Empty() const noexcept { return Portions.empty(); } - - ui64 Size() const; }; } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.cpp index 4c6265d14850..6394b18b319c 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.cpp @@ -3,16 +3,9 @@ namespace NKikimr::NOlap::NStorageOptimizer { -std::shared_ptr IOptimizerPlanner::GetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const { +std::shared_ptr IOptimizerPlanner::GetOptimizationTask(std::shared_ptr granule, const std::shared_ptr& dataLocksManager) const { NActors::TLogContextGuard g(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("path_id", PathId)); - auto result = DoGetOptimizationTask(limits, granule, busyPortions); - if (!!result) { - auto portions = result->GetTouchedPortions(); - for (auto&& i : portions) { - AFL_VERIFY(!busyPortions.contains(i))("portion_address", i.DebugString()); - } - } - return result; + return DoGetOptimizationTask(granule, dataLocksManager); } } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h index 9e9d1e222e3f..8090de1dee6d 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/abstract/optimizer.h @@ -1,13 +1,15 @@ #pragma once #include -#include #include #include +#include namespace NKikimr::NOlap { -struct TCompactionLimits; class TGranuleMeta; class TColumnEngineChanges; +namespace NDataLocks { +class TManager; +} } namespace NKikimr::NOlap::NStorageOptimizer { @@ -55,7 +57,7 @@ class IOptimizerPlanner { YDB_READONLY(TInstant, ActualizationInstant, TInstant::Zero()); protected: virtual void DoModifyPortions(const THashMap>& add, const THashMap>& remove) = 0; - virtual std::shared_ptr DoGetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const = 0; + virtual std::shared_ptr DoGetOptimizationTask(std::shared_ptr granule, const std::shared_ptr& dataLocksManager) const = 0; virtual TOptimizationPriority DoGetUsefulMetric() const = 0; virtual void DoActualize(const TInstant currentInstant) = 0; virtual TString DoDebugString() const { @@ -64,7 +66,8 @@ class IOptimizerPlanner { virtual NJson::TJsonValue DoSerializeToJsonVisual() const { return NJson::JSON_NULL; } - + virtual bool DoIsLocked(const std::shared_ptr& dataLocksManager) const = 0; + public: using TFactory = NObjectFactory::TObjectFactory; IOptimizerPlanner(const ui64 pathId) @@ -115,7 +118,10 @@ class IOptimizerPlanner { return DoDebugString(); } - virtual std::vector GetBucketPositions() const = 0; + virtual std::vector GetBucketPositions() const = 0; + bool IsLocked(const std::shared_ptr& dataLocksManager) const { + return DoIsLocked(dataLocksManager); + } NJson::TJsonValue SerializeToJsonVisual() const { return DoSerializeToJsonVisual(); @@ -126,7 +132,7 @@ class IOptimizerPlanner { DoModifyPortions(add, remove); } - std::shared_ptr GetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const; + std::shared_ptr GetOptimizationTask(std::shared_ptr granule, const std::shared_ptr& dataLocksManager) const; TOptimizationPriority GetUsefulMetric() const { return DoGetUsefulMetric(); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.cpp deleted file mode 100644 index da20b6420059..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.cpp +++ /dev/null @@ -1,43 +0,0 @@ -#include "blob_size.h" -#include -#include - -namespace NKikimr::NOlap::NStorageOptimizer { - -std::shared_ptr TBlobsWithSizeLimit::BuildMergeTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const { - if (PortionsSize > (i64)SizeLimitToMerge || PortionsCount > CountLimitToMerge) { - i64 currentSum = 0; - std::vector> portions; - std::optional tierName; - for (auto&& i : Portions) { - for (auto&& c : i.second) { - if (busyPortions.contains(c.second->GetAddress())) { - continue; - } - if (c.second->GetMeta().GetTierName() && (!tierName || *tierName < c.second->GetMeta().GetTierName())) { - tierName = c.second->GetMeta().GetTierName(); - } - currentSum += c.second->GetBlobBytes(); - portions.emplace_back(c.second); - if (currentSum > (i64)32 * 1024 * 1024) { - break; - } - } - if (currentSum > (i64)32 * 1024 * 1024) { - break; - } - } - if (currentSum > SizeLimitToMerge || PortionsCount > CountLimitToMerge) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_with_small")("portions", portions.size())("current_sum", currentSum); - TSaverContext saverContext(StoragesManager->GetOperator(tierName.value_or(IStoragesManager::DefaultStorageId)), StoragesManager); - return std::make_shared(limits.GetSplitSettings(), granule, portions, saverContext); - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_with_small")("skip", "not_enough_data"); - } - } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_with_small")("event", "skip_by_condition"); - } - return nullptr; -} - -} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.h b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.h deleted file mode 100644 index df4eb4d35807..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/blob_size.h +++ /dev/null @@ -1,145 +0,0 @@ -#pragma once -#include "counters.h" -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap::NStorageOptimizer { - -class TBlobsWithSizeLimit { -private: - YDB_READONLY(ui64, SizeLimit, 0); - YDB_READONLY(i64, SizeLimitToMerge, (i64)2 * 1024 * 1024); - YDB_READONLY(i64, CountLimitToMerge, 8); - YDB_READONLY(i64, PortionsSize, 0); - YDB_READONLY(i64, PortionsCount, 0); - std::map>> Portions; - std::shared_ptr Counters; - std::shared_ptr StoragesManager; -public: - TString DebugString() const { - return TStringBuilder() - << "p_count=" << PortionsCount << ";" - << "p_count_by_key=" << Portions.size() << ";" - ; - } - - TBlobsWithSizeLimit(const ui64 limit, const std::shared_ptr& counters, const std::shared_ptr& storagesManager) - : SizeLimit(limit) - , Counters(counters) - , StoragesManager(storagesManager) - { - - } - - std::shared_ptr BuildMergeTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const; - - void AddPortion(const std::shared_ptr& portion) { - AFL_VERIFY(portion->BlobsBytes() < SizeLimit); - AFL_VERIFY(Portions[portion->IndexKeyStart()].emplace(portion->GetPortion(), portion).second); - PortionsSize += portion->BlobsBytes(); - ++PortionsCount; - Counters->OnAddSmallPortion(); - } - - void RemovePortion(const std::shared_ptr& portion) { - auto it = Portions.find(portion->IndexKeyStart()); - AFL_VERIFY(it != Portions.end()); - AFL_VERIFY(it->second.erase(portion->GetPortion())); - if (!it->second.size()) { - Portions.erase(it); - } - PortionsSize -= portion->BlobsBytes(); - AFL_VERIFY(PortionsSize >= 0); - --PortionsCount; - AFL_VERIFY(PortionsCount >= 0); - Counters->OnRemoveSmallPortion(); - } - - std::optional GetWeight() const { - Y_ABORT_UNLESS(Counters->GetSmallCounts() == PortionsCount); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("portions_opt_count", PortionsCount)("counter", (ui64)Counters->SmallPortionsByGranule.get()); - if (PortionsSize > SizeLimitToMerge || PortionsCount > CountLimitToMerge) { - return TOptimizationPriority::Critical(PortionsCount); - } else { - return {}; - } - } -}; - -class TBlobsBySize { -private: - std::map BlobsBySizeLimit; -public: - TString DebugString() const { - TStringBuilder sb; - sb << "("; - for (auto&& i : BlobsBySizeLimit) { - sb << "(" << i.first << ":" << i.second.DebugString() << ");"; - } - sb << ")"; - return sb; - } - - void AddPortion(const std::shared_ptr& portion) { - auto it = BlobsBySizeLimit.upper_bound(portion->GetBlobBytes()); - if (it != BlobsBySizeLimit.end()) { - it->second.AddPortion(portion); - } - } - - void RemovePortion(const std::shared_ptr& portion) { - auto it = BlobsBySizeLimit.upper_bound(portion->GetBlobBytes()); - if (it != BlobsBySizeLimit.end()) { - it->second.RemovePortion(portion); - } - } - - std::optional GetWeight() const { - std::optional result; - for (auto&& i : BlobsBySizeLimit) { - auto w = i.second.GetWeight(); - if (!w) { - continue; - } - if (!result || *result < *w) { - result = w; - } - } - return result; - } - - const TBlobsWithSizeLimit* GetMaxWeightLimiter() const { - std::optional resultWeight; - const TBlobsWithSizeLimit* result = nullptr; - for (auto&& i : BlobsBySizeLimit) { - auto w = i.second.GetWeight(); - if (!w) { - continue; - } - if (!resultWeight || *resultWeight < *w) { - resultWeight = w; - result = &i.second; - } - } - return result; - } - - std::shared_ptr BuildMergeTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const { - auto* limiter = GetMaxWeightLimiter(); - if (!limiter) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("fail", "limiter absent"); - return nullptr; - } - return limiter->BuildMergeTask(limits, granule, busyPortions); - } - - TBlobsBySize(const std::shared_ptr& counters, const std::shared_ptr& storagesManager) { -// BlobsBySizeLimit.emplace(512 * 1024, TBlobsWithSizeLimit(512 * 1024, counters, storagesManager)); - BlobsBySizeLimit.emplace(1024 * 1024, TBlobsWithSizeLimit(1024 * 1024, counters, storagesManager)); - } -}; - -} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/counters.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/counters.cpp deleted file mode 100644 index a8c1f6920c1a..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/counters.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "counters.h" - -namespace NKikimr::NOlap::NStorageOptimizer { - -} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/counters.h b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/counters.h deleted file mode 100644 index e43f9acda67a..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/counters.h +++ /dev/null @@ -1,98 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap::NStorageOptimizer { - -class TGlobalCounters: public NColumnShard::TCommonCountersOwner { -private: - using TBase = NColumnShard::TCommonCountersOwner; - NMonitoring::TDynamicCounters::TCounterPtr SmallPortionsCount; - std::shared_ptr HistogramOverlappedIntervalsCount; - std::shared_ptr HistogramOverlappedIntervalsPackedSizeCount; - std::shared_ptr HistogramOverlappedIntervalsRawSizeCount; - std::shared_ptr SmallPortionsCountByGranule; -public: - TGlobalCounters() - : TBase("IntervalsStorageOptimizer") { - SmallPortionsCountByGranule = TBase::GetValueAutoAggregations("Granule/SmallPortions/Count"); - SmallPortionsCount = TBase::GetValue("SmallPortions/Count"); - - const std::set borders = {0, 1, 2, 4, 8, 16, 32, 64}; - HistogramOverlappedIntervalsCount = std::make_shared("IntervalsStorageOptimizer", "OverlappedIntervals/Count", "", borders); - HistogramOverlappedIntervalsPackedSizeCount = std::make_shared("IntervalsStorageOptimizer", "OverlappedIntervals/Size/Packed", "", borders); - HistogramOverlappedIntervalsRawSizeCount = std::make_shared("IntervalsStorageOptimizer", "OverlappedIntervals/Size/Raw", "", borders); - } - - static std::shared_ptr BuildClientSmallPortionsAggregation() { - return Singleton()->SmallPortionsCountByGranule->GetClient(); - } - - static std::shared_ptr BuildGuardIntervalsOverlapping() { - return Singleton()->HistogramOverlappedIntervalsCount->BuildGuard(); - } - - static std::shared_ptr BuildGuardIntervalsPackedSizeOverlapping() { - return Singleton()->HistogramOverlappedIntervalsPackedSizeCount->BuildGuard(); - } - - static std::shared_ptr BuildGuardIntervalsRawSizeOverlapping() { - return Singleton()->HistogramOverlappedIntervalsRawSizeCount->BuildGuard(); - } - - static std::shared_ptr BuildSmallPortionsGuard() { - return std::make_shared(Singleton()->SmallPortionsCount); - } - -}; - -class TCounters { -private: - std::shared_ptr IntervalsGuard; - std::shared_ptr IntervalsPackedSizeGuard; - std::shared_ptr IntervalsRawSizeGuard; - std::shared_ptr SmallPortionsCount; -public: - std::shared_ptr SmallPortionsByGranule; - i64 GetSmallCounts() const { - return SmallPortionsByGranule->GetValueSimple(); - } - - TCounters() { - IntervalsGuard = TGlobalCounters::BuildGuardIntervalsOverlapping(); - IntervalsPackedSizeGuard = TGlobalCounters::BuildGuardIntervalsPackedSizeOverlapping(); - IntervalsRawSizeGuard = TGlobalCounters::BuildGuardIntervalsRawSizeOverlapping(); - SmallPortionsCount = TGlobalCounters::BuildSmallPortionsGuard(); - SmallPortionsByGranule = TGlobalCounters::BuildClientSmallPortionsAggregation(); - } - - void OnRemoveIntervalsCount(const ui32 count, const ui64 rawSize, const ui64 packedSize) { - IntervalsGuard->Sub(count, 1); - IntervalsPackedSizeGuard->Sub(count, packedSize); - IntervalsRawSizeGuard->Sub(count, rawSize); - } - - void OnAddIntervalsCount(const ui32 count, const ui64 rawSize, const ui64 packedSize) { - IntervalsGuard->Add(count, 1); - IntervalsPackedSizeGuard->Add(count, packedSize); - IntervalsRawSizeGuard->Add(count, rawSize); - } - - void OnAddSmallPortion() { - SmallPortionsCount->Add(1); - SmallPortionsByGranule->Add(1); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("add_opt_count", SmallPortionsByGranule->GetValueSimple())("counter", (ui64)SmallPortionsByGranule.get()); - } - - void OnRemoveSmallPortion() { - SmallPortionsCount->Sub(1); - SmallPortionsByGranule->Remove(1); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("remove_opt_count", SmallPortionsByGranule->GetValueSimple())("counter", (ui64)SmallPortionsByGranule.get()); - } - -}; - -} diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.cpp deleted file mode 100644 index a31ebe4eb6a6..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.cpp +++ /dev/null @@ -1,212 +0,0 @@ -#include "optimizer.h" -#include -#include -#include -#include - -namespace NKikimr::NOlap::NStorageOptimizer { - -std::vector> TIntervalsOptimizerPlanner::GetPortionsForIntervalStartedIn(const NArrow::TReplaceKey& keyStart, const ui32 countExpectation) const { - std::vector> result; - auto it = Positions.find(keyStart); - AFL_VERIFY(it != Positions.end()); - THashSet portionsCurrentlyClosed; - auto itReverse = make_reverse_iterator(it); - AFL_VERIFY(itReverse != Positions.rbegin()); - --itReverse; - for (; itReverse != Positions.rend(); ++itReverse) { - for (auto&& i : itReverse->second.GetPositions()) { - if (i.first.GetIsStart()) { - if (!portionsCurrentlyClosed.erase(i.first.GetPortionId())) { - result.emplace_back(i.second.GetPortionPtr()); - } - } else { - AFL_VERIFY(portionsCurrentlyClosed.emplace(i.first.GetPortionId()).second); - } - } - if (result.size() == countExpectation) { - return result; - } - } - AFL_VERIFY(false)("result.size()", result.size())("expectation", countExpectation); - return result; -} - -std::shared_ptr TIntervalsOptimizerPlanner::DoGetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const { - if (auto result = SizeProblemBlobs.BuildMergeTask(limits, granule, busyPortions)) { - return result; - } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("skip", "no_small_portion_tasks"); - return nullptr; - if (RangedSegments.empty()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "no_ranged_segments"); - return nullptr; - } - auto& topSegment = **RangedSegments.rbegin()->second.begin(); - auto& features = topSegment.GetFeatures(); - std::vector> portions = GetPortionsForIntervalStartedIn(topSegment.GetPosition(), features.GetPortionsCount()); - - if (portions.size() <= 1) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_skip")("features", features.DebugJson().GetStringRobust())("reason", "one_portion"); - return nullptr; - } - - std::optional tierName; - for (auto&& i : portions) { - if (i->GetMeta().GetTierName() && (!tierName || *tierName < i->GetMeta().GetTierName())) { - tierName = i->GetMeta().GetTierName(); - } - if (busyPortions.contains(i->GetAddress())) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule_skip")("features", features.DebugJson().GetStringRobust()) - ("count", features.GetPortionsCount())("reason", "busy_portion")("portion_address", i->GetAddress().DebugString()); - return nullptr; - } - } - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "take_granule")("features", features.DebugJson().GetStringRobust())("count", features.GetPortionsCount()); - - TSaverContext saverContext(StoragesManager->GetOperator(tierName.value_or(IStoragesManager::DefaultStorageId)), StoragesManager); - return std::make_shared(limits.GetSplitSettings(), granule, portions, saverContext); -} - -void TIntervalsOptimizerPlanner::RemovePortion(const std::shared_ptr& info) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "remove_portion")("portion_id", info->GetPortion()); - auto itStart = Positions.find(info->IndexKeyStart()); - auto itFinish = Positions.find(info->IndexKeyEnd()); - Y_ABORT_UNLESS(itStart != Positions.end()); - Y_ABORT_UNLESS(itFinish != Positions.end()); - if (itStart == itFinish) { - RemoveRanged(itStart->second); - itStart->second.RemoveSummary(info); - AddRanged(itStart->second); - if (itStart->second.RemoveStart(info) || itStart->second.RemoveFinish(info)) { - RemoveRanged(itStart->second); - Positions.erase(itStart); - } - } else { - for (auto it = itStart; it != itFinish; ++it) { - RemoveRanged(it->second); - it->second.RemoveSummary(info); - AddRanged(it->second); - } - if (itStart->second.RemoveStart(info)) { - RemoveRanged(itStart->second); - Positions.erase(itStart); - } - if (itFinish->second.RemoveFinish(info)) { - RemoveRanged(itFinish->second); - Positions.erase(itFinish); - } - } - AFL_VERIFY(RangedSegments.empty() == Positions.empty())("rs_size", RangedSegments.size())("p_size", Positions.size()); -} - -void TIntervalsOptimizerPlanner::AddPortion(const std::shared_ptr& info) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "add_portion")("portion_id", info->GetPortion()); - auto itStart = Positions.find(info->IndexKeyStart()); - if (itStart == Positions.end()) { - itStart = Positions.emplace(info->IndexKeyStart(), TBorderPositions(info->IndexKeyStart())).first; - if (itStart != Positions.begin()) { - auto itStartCopy = itStart; - --itStartCopy; - itStart->second.CopyFrom(itStartCopy->second); - AddRanged(itStart->second); - } - } - auto itEnd = Positions.find(info->IndexKeyEnd()); - if (itEnd == Positions.end()) { - itEnd = Positions.emplace(info->IndexKeyEnd(), TBorderPositions(info->IndexKeyEnd())).first; - Y_ABORT_UNLESS(itEnd != Positions.begin()); - auto itEndCopy = itEnd; - --itEndCopy; - itEnd->second.CopyFrom(itEndCopy->second); - AddRanged(itEnd->second); - itStart = Positions.find(info->IndexKeyStart()); - } - Y_ABORT_UNLESS(itStart != Positions.end()); - Y_ABORT_UNLESS(itEnd != Positions.end()); - itStart->second.AddStart(info); - itEnd->second.AddFinish(info); - if (itStart != itEnd) { - for (auto it = itStart; it != itEnd; ++it) { - RemoveRanged(it->second); - it->second.AddSummary(info); - AFL_VERIFY(!!it->second.GetFeatures()); - AddRanged(it->second); - } - } else { - RemoveRanged(itStart->second); - itStart->second.AddSummary(info); - AddRanged(itStart->second); - } - AFL_VERIFY(RangedSegments.empty() == Positions.empty())("rs_size", RangedSegments.size())("p_size", Positions.size()); -} - -void TIntervalsOptimizerPlanner::DoModifyPortions(const THashMap>& add, const THashMap>& remove) { - for (auto&& [_, i] : remove) { - SizeProblemBlobs.RemovePortion(i); - RemovePortion(i); - } - for (auto&& [_, i] : add) { - SizeProblemBlobs.AddPortion(i); - AddPortion(i); - } -} - -void TIntervalsOptimizerPlanner::RemoveRanged(const TBorderPositions& data) { - if (!!data.GetFeatures()) { - Counters->OnRemoveIntervalsCount(data.GetFeatures().GetPortionsCount(), data.GetFeatures().GetPortionsRawWeight(), data.GetFeatures().GetPortionsWeight()); - auto itFeatures = RangedSegments.find(data.GetFeatures()); - Y_ABORT_UNLESS(itFeatures->second.erase(&data)); - if (itFeatures->second.empty()) { - RangedSegments.erase(itFeatures); - } - } -} - -void TIntervalsOptimizerPlanner::AddRanged(const TBorderPositions& data) { - if (!!data.GetFeatures()) { - Counters->OnAddIntervalsCount(data.GetFeatures().GetPortionsCount(), data.GetFeatures().GetPortionsRawWeight(), data.GetFeatures().GetPortionsWeight()); - Y_ABORT_UNLESS(RangedSegments[data.GetFeatures()].emplace(&data).second); - } -} - -TIntervalsOptimizerPlanner::TIntervalsOptimizerPlanner(const ui64 pathId, const std::shared_ptr& storagesManager) - : TBase(pathId) - , StoragesManager(storagesManager) - , Counters(std::make_shared()) - , SizeProblemBlobs(Counters, storagesManager) -{ -} - -TOptimizationPriority TIntervalsOptimizerPlanner::DoGetUsefulMetric() const { - auto res = SizeProblemBlobs.GetWeight(); - if (!!res) { - AFL_VERIFY(RangedSegments.size())("positions", Positions.size())("sizes", SizeProblemBlobs.DebugString()); - return *res; - } - if (RangedSegments.empty()) { - return TOptimizationPriority::Zero(); - } - auto& topSegment = **RangedSegments.rbegin()->second.begin(); - auto& topFeaturesTask = topSegment.GetFeatures(); - return TOptimizationPriority::Optimization(topFeaturesTask.GetUsefulMetric()); -} - -TString TIntervalsOptimizerPlanner::DoDebugString() const { - NJson::TJsonValue result = NJson::JSON_MAP; - auto& positions = result.InsertValue("positions", NJson::JSON_ARRAY); - for (auto&& i : Positions) { - positions.AppendValue(i.second.DebugJson()); - } - return result.GetStringRobust(); -} - -void TIntervalsOptimizerPlanner::TBorderPositions::AddSummary(const std::shared_ptr& info) { - Features.Add(info); -} - -void TIntervalsOptimizerPlanner::TBorderPositions::RemoveSummary(const std::shared_ptr& info) { - Features.Remove(info); -} - -} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.h deleted file mode 100644 index 80f3a5edc583..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/optimizer.h +++ /dev/null @@ -1,256 +0,0 @@ -#pragma once -#include "optimizer.h" -#include "counters.h" -#include "blob_size.h" -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap::NStorageOptimizer { - -class TCounters; - -class TSegmentPosition { -private: - std::shared_ptr Portion; - const NArrow::TReplaceKey& Position; - const bool IsStartFlag; - TSegmentPosition(const std::shared_ptr& data, const bool start) - : Portion(data) - , Position(start ? Portion->IndexKeyStart() : Portion->IndexKeyEnd()) - , IsStartFlag(start) - { - - } -public: - - NJson::TJsonValue DebugJson() const { - NJson::TJsonValue result = NJson::JSON_MAP; - result.InsertValue("is_start", IsStartFlag); - return result; - } - - TString DebugString() const { - return TStringBuilder() << - (IsStartFlag ? "ADD" : "REMOVE") << ":" << - Position.DebugString() << ";" << - Portion->DebugString() << ";" - ; - } - - std::shared_ptr GetPortionPtr() const { - return Portion; - } - - const TPortionInfo& GetPortion() const { - return *Portion; - } - - const NArrow::TReplaceKey& GetPosition() const { - return Position; - } - - static TSegmentPosition Start(const std::shared_ptr& data) { - return TSegmentPosition(data, true); - } - - static TSegmentPosition Finish(const std::shared_ptr& data) { - return TSegmentPosition(data, false); - } - - bool operator<(const TSegmentPosition& item) const { - return Portion->GetPortion() < item.Portion->GetPortion(); - } -}; - -class TIntervalFeatures { -private: - YDB_READONLY(i32, PortionsCount, 0); - YDB_READONLY(i32, RecordsCount, 0); - YDB_READONLY(i64, PortionsWeight, 0); - YDB_READONLY(i64, PortionsRawWeight, 0); - YDB_READONLY(i64, SmallPortionsWeight, 0); - YDB_READONLY(i64, SmallPortionsCount, 0); -public: - - NJson::TJsonValue DebugJson() const { - NJson::TJsonValue result = NJson::JSON_MAP; - result.InsertValue("p_count", PortionsCount); - result.InsertValue("p_weight", PortionsWeight); - result.InsertValue("p_raw_weight", PortionsRawWeight); - result.InsertValue("sp_count", SmallPortionsCount); - result.InsertValue("sp_weight", SmallPortionsWeight); - result.InsertValue("r_count", RecordsCount); - return result; - } - - i64 GetUsefulMetric() const { - if (PortionsCount == 1 || PortionsWeight == 0) { - return 0; - } - return (i64)10000 * PortionsCount / (PortionsWeight * 1e-6); - } - - double GetUsefulKff() const { - if (PortionsCount == 0 || PortionsWeight == 0) { - return Max(); - } - Y_ABORT_UNLESS(PortionsWeight); - return 1.0 * GetUsefulMetric() / PortionsWeight; - } - - void Add(const std::shared_ptr& info) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "add_portion_in_summary")("portion_id", info->GetPortion())("count", GetPortionsCount())("this", (ui64)this); - ++PortionsCount; - const i64 portionBytes = info->BlobsBytes(); - PortionsWeight += portionBytes; - PortionsRawWeight += info->RawBytesSum(); - RecordsCount += info->NumRows(); - if ((i64)portionBytes < TSplitSettings().GetMinBlobSize()) { - ++SmallPortionsCount; - SmallPortionsWeight += portionBytes; - } - } - - void Remove(const std::shared_ptr& info) { - AFL_TRACE(NKikimrServices::TX_COLUMNSHARD)("event", "remove_portion_from_summary")("portion_id", info->GetPortion())("count", GetPortionsCount())("this", (ui64)this); - Y_ABORT_UNLESS(--PortionsCount >= 0); - const i64 portionBytes = info->BlobsBytes(); - PortionsWeight -= portionBytes; - Y_ABORT_UNLESS(PortionsWeight >= 0); - PortionsRawWeight -= info->RawBytesSum(); - Y_ABORT_UNLESS(PortionsRawWeight >= 0); - RecordsCount -= info->NumRows(); - Y_ABORT_UNLESS(RecordsCount >= 0); - if ((i64)portionBytes < TSplitSettings().GetMinBlobSize()) { - Y_ABORT_UNLESS(--SmallPortionsCount >= 0); - SmallPortionsWeight -= portionBytes; - Y_ABORT_UNLESS(SmallPortionsWeight >= 0); - } - } - - bool operator!() const { - return !PortionsCount; - } - - bool operator<(const TIntervalFeatures& item) const { - return GetUsefulMetric() < item.GetUsefulMetric(); - } - bool IsEnoughWeight() const { - return GetPortionsRawWeight() > TSplitSettings().GetMinBlobSize() * 10; - } - -}; - -class TIntervalsOptimizerPlanner: public IOptimizerPlanner { -private: - static ui64 LimitSmallBlobsMerge; - static ui64 LimitSmallBlobDetect; - std::shared_ptr StoragesManager; - - std::shared_ptr Counters; - - using TBase = IOptimizerPlanner; - - class TPortionIntervalPoint { - private: - YDB_READONLY(ui64, PortionId, 0); - YDB_READONLY(bool, IsStart, false); - public: - TPortionIntervalPoint(const ui64 portionId, const bool isStart) - : PortionId(portionId) - , IsStart(isStart) - { - - } - - bool operator<(const TPortionIntervalPoint& item) const { - return std::tie(PortionId, IsStart) < std::tie(item.PortionId, item.IsStart); - } - }; - - class TBorderPositions { - private: - const NArrow::TReplaceKey Position; - std::map Positions; - YDB_READONLY_DEF(TIntervalFeatures, Features); - public: - TBorderPositions(const NArrow::TReplaceKey& position) - : Position(position) - { - - } - - const std::map& GetPositions() const { - return Positions; - } - - NJson::TJsonValue DebugJson() const { - NJson::TJsonValue result = NJson::JSON_MAP; - result.InsertValue("p", Position.DebugString()); - auto& segments = result.InsertValue("segments", NJson::JSON_ARRAY); - for (auto&& i : Positions) { - segments.AppendValue(i.second.DebugJson()); - } - result.InsertValue("features", Features.DebugJson()); - return result; - } - - void CopyFrom(const TBorderPositions& source) { - Features = source.Features; - } - - const NArrow::TReplaceKey& GetPosition() const { - return Position; - } - - void AddStart(const std::shared_ptr& info) { - Y_ABORT_UNLESS(Positions.emplace(TPortionIntervalPoint(info->GetPortion(), true), TSegmentPosition::Start(info)).second); - } - void AddFinish(const std::shared_ptr& info) { - Y_ABORT_UNLESS(Positions.emplace(TPortionIntervalPoint(info->GetPortion(), false), TSegmentPosition::Finish(info)).second); - } - bool RemoveStart(const std::shared_ptr& info) { - Y_ABORT_UNLESS(Positions.erase(TPortionIntervalPoint(info->GetPortion(), true))); - return Positions.empty(); - } - bool RemoveFinish(const std::shared_ptr& info) { - Y_ABORT_UNLESS(Positions.erase(TPortionIntervalPoint(info->GetPortion(), false))); - return Positions.empty(); - } - void AddSummary(const std::shared_ptr& info); - void RemoveSummary(const std::shared_ptr& info); - }; - - std::map> RangedSegments; - - using TPositions = std::map; - TPositions Positions; - TBlobsBySize SizeProblemBlobs; - - void RemovePortion(const std::shared_ptr& info); - void AddPortion(const std::shared_ptr& info); - - void RemoveRanged(const TBorderPositions& data); - void AddRanged(const TBorderPositions& data); - - bool RemoveSmallPortion(const std::shared_ptr& info); - - bool AddSmallPortion(const std::shared_ptr& info); - - std::vector> GetPortionsForIntervalStartedIn(const NArrow::TReplaceKey& keyStart, const ui32 countExpectation) const; - -protected: - virtual void DoModifyPortions(const THashMap>& add, const THashMap>& remove) override; - virtual std::shared_ptr DoGetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const override; - - virtual TOptimizationPriority DoGetUsefulMetric() const override; - virtual TString DoDebugString() const override; - -public: - TIntervalsOptimizerPlanner(const ui64 pathId, const std::shared_ptr& storagesManager); -}; - -} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/ya.make b/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/ya.make deleted file mode 100644 index f76c42447ab7..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/intervals/ya.make +++ /dev/null @@ -1,16 +0,0 @@ -LIBRARY() - -SRCS( - optimizer.cpp - blob_size.cpp - counters.cpp -) - -PEERDIR( - contrib/libs/apache/arrow - ydb/core/protos - ydb/core/formats/arrow - ydb/core/tx/columnshard/engines/changes/abstract -) - -END() diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.cpp index 146a8700def1..f6e4cca64311 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.cpp +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.cpp @@ -1,5 +1,5 @@ #include "counters.h" -namespace NKikimr::NOlap::NStorageOptimizer::NBuckets { +namespace NKikimr::NOlap::NActualizer { } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.h b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.h index 9ab62e8f8801..08e9b51fc59b 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/counters.h @@ -39,13 +39,13 @@ class TPortionCategoryCounters { void AddPortion(const std::shared_ptr& p) { RecordsCount->Add(p->NumRows()); Count->Add(1); - Bytes->Add(p->GetBlobBytes()); + Bytes->Add(p->GetTotalBlobBytes()); } void RemovePortion(const std::shared_ptr& p) { RecordsCount->Remove(p->NumRows()); Count->Remove(1); - Bytes->Remove(p->GetBlobBytes()); + Bytes->Remove(p->GetTotalBlobBytes()); } }; diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/optimizer.h index 9ce196f5a792..148dd5da4a26 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/optimizer.h +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets/optimizer.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include @@ -13,6 +14,7 @@ #include #include #include +#include namespace NKikimr::NOlap::NStorageOptimizer::NBuckets { @@ -42,12 +44,12 @@ class TSimplePortionsGroupInfo { } void AddPortion(const std::shared_ptr& p) { - Bytes += p->GetBlobBytes(); + Bytes += p->GetTotalBlobBytes(); Count += 1; RecordsCount += p->NumRows(); } void RemovePortion(const std::shared_ptr& p) { - Bytes -= p->GetBlobBytes(); + Bytes -= p->GetTotalBlobBytes(); Count -= 1; RecordsCount -= p->NumRows(); AFL_VERIFY(Bytes >= 0); @@ -200,6 +202,44 @@ class TPortionsPool { } public: + void InitRuntimeFeature() const { + for (auto&& f : Futures) { + for (auto&& p : f.second) { + p.second->RemoveRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized); + } + } + for (auto&& i : PreActuals) { + i.second->RemoveRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized); + } + for (auto&& i : Actuals) { + i.second->RemoveRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized); + } + } + + bool IsLocked(const std::shared_ptr& dataLocksManager) const { + for (auto&& f : Futures) { + for (auto&& p : f.second) { + if (auto lockInfo = dataLocksManager->IsLocked(*p.second)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "optimization_locked")("reason", *lockInfo); + return true; + } + } + } + for (auto&& i : PreActuals) { + if (auto lockInfo = dataLocksManager->IsLocked(*i.second)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "optimization_locked")("reason", *lockInfo); + return true; + } + } + for (auto&& i : Actuals) { + if (auto lockInfo = dataLocksManager->IsLocked(*i.second)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "optimization_locked")("reason", *lockInfo); + return true; + } + } + return false; + } + bool Validate(const std::shared_ptr& portion) const { if (portion) { AFL_VERIFY(!PreActuals.contains(portion->GetPortionId())); @@ -379,6 +419,7 @@ class TPortionsPool { } void Add(const std::shared_ptr& portion, const TInstant now) { + portion->RemoveRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized); auto portionMaxSnapshotInstant = TInstant::MilliSeconds(portion->RecordSnapshotMax().GetPlanStep()); if (now - portionMaxSnapshotInstant < FutureDetector) { AFL_VERIFY(AddFuture(portion)); @@ -393,6 +434,7 @@ class TPortionsPool { } bool Remove(const std::shared_ptr& portion) Y_WARN_UNUSED_RESULT { + portion->AddRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized); if (RemovePreActual(portion)) { return true; } @@ -515,7 +557,7 @@ class TPortionsPool { /* const ui64 count = BucketInfo.GetCount() + ((mainPortion && !isFinal) ? 1 : 0); // const ui64 recordsCount = BucketInfo.GetRecordsCount() + ((mainPortion && !isFinal) ? mainPortion->GetRecordsCount() : 0); - const ui64 sumBytes = BucketInfo.GetBytes() + ((mainPortion && !isFinal) ? mainPortion->GetBlobBytes() : 0); + const ui64 sumBytes = BucketInfo.GetBytes() + ((mainPortion && !isFinal) ? mainPortion->GetTotalBlobBytes() : 0); if (count <= 1) { return 0; } @@ -532,7 +574,7 @@ class TPortionsPool { /* const ui64 count = BucketInfo.GetCount() + ((mainPortion && !isFinal) ? 1 : 0); // const ui64 recordsCount = BucketInfo.GetRecordsCount() + ((mainPortion && !isFinal) ? mainPortion->GetRecordsCount() : 0); - const ui64 sumBytes = BucketInfo.GetBytes() + ((mainPortion && !isFinal) ? mainPortion->GetBlobBytes() : 0); + const ui64 sumBytes = BucketInfo.GetBytes() + ((mainPortion && !isFinal) ? mainPortion->GetTotalBlobBytes() : 0); if (count > 1 && (sumBytes > 32 * 1024 * 1024 || !isFinal || count > 100)) { return (10000000000.0 * count - sumBytes) * (isFinal ? 1 : 10); } else { @@ -542,7 +584,7 @@ class TPortionsPool { const ui64 count = BucketInfo.GetCount() + ((mainPortion && !isFinal) ? 1 : 0); const ui64 recordsCount = BucketInfo.GetRecordsCount() + ((mainPortion && !isFinal) ? mainPortion->GetRecordsCount() : 0); - const ui64 sumBytes = BucketInfo.GetBytes() + ((mainPortion && !isFinal) ? mainPortion->GetBlobBytes() : 0); + const ui64 sumBytes = BucketInfo.GetBytes() + ((mainPortion && !isFinal) ? mainPortion->GetTotalBlobBytes() : 0); if (NYDBTest::TControllers::GetColumnShardController()->GetCompactionControl() == NYDBTest::EOptimizerCompactionWeightControl::Disable) { return 0; } @@ -582,6 +624,7 @@ class TPortionsBucket: public TMoveOnly { private: std::shared_ptr MainPortion; const std::shared_ptr Counters; + mutable std::optional LastWeight; TPortionsPool Others; std::optional NextBorder; @@ -597,6 +640,15 @@ class TPortionsBucket: public TMoveOnly { bool Validate() const { return Others.Validate(MainPortion); } + + void RebuildOptimizedFeature(const TInstant currentInstant) const { + Others.InitRuntimeFeature(); + if (!MainPortion) { + return; + } + MainPortion->InitRuntimeFeature(TPortionInfo::ERuntimeFeature::Optimized, Others.IsEmpty() && currentInstant > MainPortion->RecordSnapshotMax().GetPlanInstant() + + NYDBTest::TControllers::GetColumnShardController()->GetLagForCompactionBeforeTierings(TDuration::Minutes(60))); + } public: class TModificationGuard: TNonCopyable { private: @@ -631,6 +683,16 @@ class TPortionsBucket: public TMoveOnly { } }; + bool IsLocked(const std::shared_ptr& dataLocksManager) const { + if (MainPortion) { + if (auto lockInfo = dataLocksManager->IsLocked(*MainPortion)) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "optimization_locked")("reason", *lockInfo); + return true; + } + } + return Others.IsLocked(dataLocksManager); + } + bool IsEmpty() const { return !MainPortion && Others.IsEmpty(); } @@ -699,11 +761,20 @@ class TPortionsBucket: public TMoveOnly { } i64 GetWeight() const { - return Others.GetWeight(MainPortion, !NextBorder); + LastWeight = Others.GetWeight(MainPortion, !NextBorder); + return *LastWeight; + } + + i64 GetLastWeight() const { + if (LastWeight) { + return *LastWeight; + } else { + return GetWeight(); + } } - std::shared_ptr BuildOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, - const THashSet& busyPortions, const NArrow::TReplaceKey* nextBorder, const std::shared_ptr& primaryKeysSchema, + std::shared_ptr BuildOptimizationTask(std::shared_ptr granule, + const std::shared_ptr& locksManager, const NArrow::TReplaceKey* nextBorder, const std::shared_ptr& primaryKeysSchema, const std::shared_ptr& storagesManager) const { auto youngestPortion = GetYoungestPortion(nextBorder); @@ -743,26 +814,26 @@ class TPortionsBucket: public TMoveOnly { AFL_VERIFY(portions.size() > 1); ui64 size = 0; for (auto&& i : portions) { - size += i->GetBlobBytes(); - if (busyPortions.contains(i->GetAddress())) { + size += i->GetTotalBlobBytes(); + if (locksManager->IsLocked(*i)) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("info", Others.DebugString())("event", "skip_optimization")("reason", "busy"); return nullptr; } } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("stop_instant", stopInstant)("size", size)("next", NextBorder ? NextBorder->DebugString() : "") ("count", portions.size())("info", Others.DebugString())("event", "start_optimization")("stop_point", stopPoint ? stopPoint->DebugString() : ""); - TSaverContext saverContext(storagesManager->GetOperator(IStoragesManager::DefaultStorageId), storagesManager); - auto result = std::make_shared(limits.GetSplitSettings(), granule, portions, saverContext); + TSaverContext saverContext(storagesManager); + auto result = std::make_shared(granule, portions, saverContext); if (MainPortion) { - NIndexedReader::TSortableBatchPosition pos(MainPortion->IndexKeyStart().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); + NArrow::NMerger::TSortableBatchPosition pos(MainPortion->IndexKeyStart().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); result->AddCheckPoint(pos, true, false); } if (!nextBorder && MainPortion) { - NIndexedReader::TSortableBatchPosition pos(MainPortion->IndexKeyEnd().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); + NArrow::NMerger::TSortableBatchPosition pos(MainPortion->IndexKeyEnd().ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); result->AddCheckPoint(pos, true, false); } if (stopPoint) { - NIndexedReader::TSortableBatchPosition pos(stopPoint->ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); + NArrow::NMerger::TSortableBatchPosition pos(stopPoint->ToBatch(primaryKeysSchema), 0, primaryKeysSchema->field_names(), {}, false); result->AddCheckPoint(pos, false, false); } return result; @@ -800,6 +871,7 @@ class TPortionsBucket: public TMoveOnly { void Actualize(const TInstant currentInstant) { auto gChartsThis = StartModificationGuard(); Others.Actualize(currentInstant); + RebuildOptimizedFeature(currentInstant); } void SplitOthersWith(TPortionsBucket& dest) { @@ -838,7 +910,7 @@ class TPortionBuckets { } void RemoveBucketFromRating(const std::shared_ptr& bucket) { - auto it = BucketsByWeight.find(bucket->GetWeight()); + auto it = BucketsByWeight.find(bucket->GetLastWeight()); AFL_VERIFY(it != BucketsByWeight.end()); AFL_VERIFY(it->second.erase(bucket.get())); if (it->second.empty()) { @@ -917,6 +989,17 @@ class TPortionBuckets { AddBucketToRating(LeftBucket); } + bool IsLocked(const std::shared_ptr& dataLocksManager) const { + if (BucketsByWeight.empty()) { + return false; + } + if (BucketsByWeight.rbegin()->second.empty()) { + return false; + } + const TPortionsBucket* bucketForOptimization = *BucketsByWeight.rbegin()->second.begin(); + return bucketForOptimization->IsLocked(dataLocksManager); + } + bool IsEmpty() const { return Buckets.empty() && LeftBucket->IsEmpty(); } @@ -944,7 +1027,7 @@ class TPortionBuckets { } void RemovePortion(const std::shared_ptr& portion) { - if (portion->GetBlobBytes() < SmallPortionDetectSizeLimit) { + if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector(SmallPortionDetectSizeLimit)) { Counters->SmallPortions->RemovePortion(portion); } if (!RemoveBucket(portion)) { @@ -952,34 +1035,33 @@ class TPortionBuckets { } } - std::shared_ptr BuildOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const { + std::shared_ptr BuildOptimizationTask(std::shared_ptr granule, const std::shared_ptr& locksManager) const { AFL_VERIFY(BucketsByWeight.size()); if (!BucketsByWeight.rbegin()->first) { return nullptr; + } + AFL_VERIFY(BucketsByWeight.rbegin()->second.size()); + const TPortionsBucket* bucketForOptimization = *BucketsByWeight.rbegin()->second.begin(); + if (bucketForOptimization == LeftBucket.get()) { + if (Buckets.size()) { + return bucketForOptimization->BuildOptimizationTask(granule, locksManager, &Buckets.begin()->first, PrimaryKeysSchema, StoragesManager); + } else { + return bucketForOptimization->BuildOptimizationTask(granule, locksManager, nullptr, PrimaryKeysSchema, StoragesManager); + } } else { - AFL_VERIFY(BucketsByWeight.rbegin()->second.size()); - const TPortionsBucket* bucketForOptimization = *BucketsByWeight.rbegin()->second.begin(); - if (bucketForOptimization == LeftBucket.get()) { - if (Buckets.size()) { - return bucketForOptimization->BuildOptimizationTask(limits, granule, busyPortions, &Buckets.begin()->first, PrimaryKeysSchema, StoragesManager); - } else { - return bucketForOptimization->BuildOptimizationTask(limits, granule, busyPortions, nullptr, PrimaryKeysSchema, StoragesManager); - } + auto it = Buckets.find(bucketForOptimization->GetPortion()->IndexKeyStart()); + AFL_VERIFY(it != Buckets.end()); + ++it; + if (it != Buckets.end()) { + return bucketForOptimization->BuildOptimizationTask(granule, locksManager, &it->first, PrimaryKeysSchema, StoragesManager); } else { - auto it = Buckets.find(bucketForOptimization->GetPortion()->IndexKeyStart()); - AFL_VERIFY(it != Buckets.end()); - ++it; - if (it != Buckets.end()) { - return bucketForOptimization->BuildOptimizationTask(limits, granule, busyPortions, &it->first, PrimaryKeysSchema, StoragesManager); - } else { - return bucketForOptimization->BuildOptimizationTask(limits, granule, busyPortions, nullptr, PrimaryKeysSchema, StoragesManager); - } + return bucketForOptimization->BuildOptimizationTask(granule, locksManager, nullptr, PrimaryKeysSchema, StoragesManager); } } - } + } void AddPortion(const std::shared_ptr& portion, const TInstant now) { - if (portion->GetBlobBytes() < SmallPortionDetectSizeLimit) { + if (portion->GetTotalBlobBytes() < NYDBTest::TControllers::GetColumnShardController()->GetSmallPortionSizeDetector(SmallPortionDetectSizeLimit)) { Counters->SmallPortions->AddPortion(portion); AddOther(portion, now); return; @@ -994,8 +1076,14 @@ class TPortionBuckets { } else { if (itFrom == Buckets.end()) { const TDuration freshness = now - TInstant::MilliSeconds(portion->RecordSnapshotMax().GetPlanStep()); - if (freshness < GetCommonFreshnessCheckDuration() || portion->GetMeta().GetProduced() == NPortion::EProduced::INSERTED) { - AddOther(portion, now); + if (Y_LIKELY(!NYDBTest::TControllers::GetColumnShardController()->NeedForceCompactionBacketsConstruction())) { + if (freshness < GetCommonFreshnessCheckDuration() || portion->GetMeta().GetProduced() == NPortion::EProduced::INSERTED) { + AddOther(portion, now); + return; + } + } + if (Buckets.empty()) { + AddBucket(portion); return; } } @@ -1008,10 +1096,10 @@ class TPortionBuckets { } } - std::vector GetBucketPositions() const { - std::vector result; + std::vector GetBucketPositions() const { + std::vector result; for (auto&& i : Buckets) { - NIndexedReader::TSortableBatchPosition pos(i.second->GetPortion()->IndexKeyStart().ToBatch(PrimaryKeysSchema), 0, PrimaryKeysSchema->field_names(), {}, false); + NArrow::NMerger::TSortableBatchPosition pos(i.second->GetPortion()->IndexKeyStart().ToBatch(PrimaryKeysSchema), 0, PrimaryKeysSchema->field_names(), {}, false); result.emplace_back(pos); } return result; @@ -1025,6 +1113,10 @@ class TOptimizerPlanner: public IOptimizerPlanner { TPortionBuckets Buckets; const std::shared_ptr StoragesManager; protected: + virtual bool DoIsLocked(const std::shared_ptr& dataLocksManager) const override { + return Buckets.IsLocked(dataLocksManager); + } + virtual void DoModifyPortions(const THashMap>& add, const THashMap>& remove) override { const TInstant now = TInstant::Now(); for (auto&& [_, i] : remove) { @@ -1046,13 +1138,14 @@ class TOptimizerPlanner: public IOptimizerPlanner { Buckets.AddPortion(i, now); } } - virtual std::shared_ptr DoGetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const override { - return Buckets.BuildOptimizationTask(limits, granule, busyPortions); + virtual std::shared_ptr DoGetOptimizationTask(std::shared_ptr granule, const std::shared_ptr& locksManager) const override { + return Buckets.BuildOptimizationTask(granule, locksManager); } virtual void DoActualize(const TInstant currentInstant) override { Buckets.Actualize(currentInstant); } + virtual TOptimizationPriority DoGetUsefulMetric() const override { if (Buckets.GetWeight()) { return TOptimizationPriority::Critical(Buckets.GetWeight()); @@ -1067,7 +1160,7 @@ class TOptimizerPlanner: public IOptimizerPlanner { return Buckets.SerializeToJson(); } public: - virtual std::vector GetBucketPositions() const override { + virtual std::vector GetBucketPositions() const override { return Buckets.GetBucketPositions(); } diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/counters.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/levels/counters.cpp deleted file mode 100644 index 6004ce09169f..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/counters.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "counters.h" - -namespace NKikimr::NOlap::NStorageOptimizer::NLevels { - -} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/counters.h b/ydb/core/tx/columnshard/engines/storage/optimizer/levels/counters.h deleted file mode 100644 index c0c3e2ca3d2f..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/counters.h +++ /dev/null @@ -1,107 +0,0 @@ -#pragma once -#include -#include -#include -#include -#include - -namespace NKikimr::NOlap::NStorageOptimizer::NLevels { - -class TGlobalCounters: public NColumnShard::TCommonCountersOwner { -private: - using TBase = NColumnShard::TCommonCountersOwner; - NMonitoring::TDynamicCounters::TCounterPtr SmallPortionsCount; - std::shared_ptr SmallPortionsCountByGranule; - - std::shared_ptr CriticalRecordsCount; - std::shared_ptr NormalRecordsCount; - - std::shared_ptr OldestCriticalActuality; -public: - TGlobalCounters() - : TBase("LevelsStorageOptimizer") - { - SmallPortionsCount = TBase::GetValue("SmallPortions/Count"); - CriticalRecordsCount = TBase::GetValueAutoAggregations("Granule/CriticalRecord/Count"); - NormalRecordsCount = TBase::GetValueAutoAggregations("Granule/NormalRecord/Count"); - OldestCriticalActuality = TBase::GetValueAutoAggregations("Granule/ActualityMs"); - SmallPortionsCountByGranule = TBase::GetValueAutoAggregations("Granule/SmallPortions/Count"); - } - - static std::shared_ptr BuildOldestCriticalActualityAggregation() { - return Singleton()->OldestCriticalActuality->GetClient(); - } - - static std::shared_ptr BuildClientSmallPortionsAggregation() { - return Singleton()->SmallPortionsCountByGranule->GetClient(); - } - - static std::shared_ptr BuildSmallPortionsGuard() { - return std::make_shared(Singleton()->SmallPortionsCount); - } - - static std::shared_ptr BuildCriticalRecordsCountAggregation() { - return Singleton()->CriticalRecordsCount->GetClient(); - } - - static std::shared_ptr BuildNormalRecordsCountAggregation() { - return Singleton()->NormalRecordsCount->GetClient(); - } - -}; - -class TCounters { -private: - std::shared_ptr CriticalRecordsCount; - std::shared_ptr NormalRecordsCount; - - std::shared_ptr OldestCriticalActuality; - - std::shared_ptr SmallPortionsCount; - std::shared_ptr SmallPortionsByGranule; -public: - i64 GetSmallCounts() const { - return SmallPortionsByGranule->GetValueSimple(); - } - - TCounters() { - CriticalRecordsCount = TGlobalCounters::BuildCriticalRecordsCountAggregation(); - NormalRecordsCount = TGlobalCounters::BuildNormalRecordsCountAggregation(); - SmallPortionsCount = TGlobalCounters::BuildSmallPortionsGuard(); - SmallPortionsByGranule = TGlobalCounters::BuildClientSmallPortionsAggregation(); - OldestCriticalActuality = TGlobalCounters::BuildOldestCriticalActualityAggregation(); - } - - void OnMinProblemSnapshot(const TDuration d) { - OldestCriticalActuality->SetValue(d.MilliSeconds(), TInstant::Now() + TDuration::Seconds(10)); - } - - void OnAddCriticalCount(const ui32 count) { - CriticalRecordsCount->Add(count); - } - - void OnAddNormalCount(const ui32 count) { - NormalRecordsCount->Add(count); - } - - void OnRemoveCriticalCount(const ui32 count) { - CriticalRecordsCount->Remove(count); - } - - void OnRemoveNormalCount(const ui32 count) { - NormalRecordsCount->Remove(count); - } - - void OnAddSmallPortion() { - SmallPortionsCount->Add(1); - SmallPortionsByGranule->Add(1); - } - - void OnRemoveSmallPortion() { - SmallPortionsCount->Sub(1); - SmallPortionsByGranule->Remove(1); - } - -}; - -} diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.cpp b/ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.cpp deleted file mode 100644 index c78ef905041a..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.cpp +++ /dev/null @@ -1,5 +0,0 @@ -#include "optimizer.h" - -namespace NKikimr::NOlap::NStorageOptimizer { - -} diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.h b/ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.h deleted file mode 100644 index f192ae5bb674..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/optimizer.h +++ /dev/null @@ -1,523 +0,0 @@ -#pragma once -#include "counters.h" - -#include -#include -#include -#include -#include -#include - -#include -#include -#include - -namespace NKikimr::NOlap::NStorageOptimizer::NLevels { - -class TLevelInfo { -private: - THashMap Counters; - YDB_READONLY(i64, CriticalWeight, 0); - YDB_READONLY(i64, NormalizedWeight, 0); - THashSet PortionIds; - std::shared_ptr Signals; -public: - TLevelInfo(std::shared_ptr counters) - : Signals(counters) - { - - } - - void AddPortion(const std::shared_ptr& p, const ui32 refCount) { - if (p->GetBlobBytes() < (1 << 20)) { - Signals->OnAddSmallPortion(); - } - auto it = Counters.find(p->GetPortion()); - i64 refCountPred = 0; - if (it == Counters.end()) { - it = Counters.emplace(p->GetPortion(), refCount).first; - } else { - refCountPred = it->second; - it->second += refCount; - } - if (it->second == 1 && refCountPred == 0) { - NormalizedWeight += p->NumRows(); - Signals->OnAddNormalCount(p->NumRows()); - } else if (it->second >= 2 && refCountPred == 1) { - CriticalWeight += p->NumRows(); - Signals->OnAddCriticalCount(p->NumRows()); - - NormalizedWeight -= p->NumRows(); - Y_ABORT_UNLESS(NormalizedWeight >= 0); - - Signals->OnRemoveNormalCount(p->NumRows()); - } else if (it->second >= 2 && refCountPred == 0) { - CriticalWeight += p->NumRows(); - Signals->OnAddCriticalCount(p->NumRows()); - } else if (it->second >= 2 && refCountPred >= 2) { - } else { - Y_ABORT_UNLESS(false); - } - } - - void RemovePortion(const std::shared_ptr& p, const ui32 refCount) { - if (p->GetBlobBytes() < (1 << 20)) { - Signals->OnRemoveSmallPortion(); - } - auto it = Counters.find(p->GetPortion()); - Y_ABORT_UNLESS(it != Counters.end()); - const i64 refCountPred = it->second; - it->second -= refCount; - Y_ABORT_UNLESS(it->second >= 0); - if (it->second >= 2) { - } else if (it->second == 1) { - Y_ABORT_UNLESS(refCountPred >= 2); - CriticalWeight -= p->NumRows(); - Y_ABORT_UNLESS(CriticalWeight >= 0); - Signals->OnRemoveCriticalCount(p->NumRows()); - Y_ABORT_UNLESS(CriticalWeight >= 0); - NormalizedWeight += p->NumRows(); - Signals->OnAddNormalCount(p->NumRows()); - } else if (it->second == 0) { - if (refCountPred >= 2) { - Y_ABORT_UNLESS(refCountPred >= 2); - CriticalWeight -= p->NumRows(); - Y_ABORT_UNLESS(CriticalWeight >= 0); - Signals->OnRemoveCriticalCount(p->NumRows()); - } else if (refCountPred == 1) { - NormalizedWeight -= p->NumRows(); - Y_ABORT_UNLESS(NormalizedWeight >= 0); - Signals->OnRemoveNormalCount(p->NumRows()); - } else { - Y_ABORT_UNLESS(false); - } - Counters.erase(it); - } - } - -}; - -class TBorderPoint { -public: - using TBorderPortions = THashMap>; -private: - THashMap MiddleWeight; - YDB_READONLY_DEF(TBorderPortions, StartPortions); - YDB_READONLY_DEF(TBorderPortions, MiddlePortions); - YDB_READONLY_DEF(TBorderPortions, FinishPortions); - std::shared_ptr LevelInfo; -public: - void InitInternalPoint(const TBorderPoint& predPoint) { - Y_ABORT_UNLESS(predPoint.MiddleWeight.size() == predPoint.MiddlePortions.size()); - for (auto&& i : predPoint.MiddlePortions) { - auto it = predPoint.MiddleWeight.find(i.first); - if (it->second != 2) { - AddMiddle(i.second, 1); - } - } - } - - std::shared_ptr GetOnlyPortion() const { - Y_ABORT_UNLESS(MiddlePortions.size() == 1); - Y_ABORT_UNLESS(!IsCritical()); - return MiddlePortions.begin()->second; - } - - bool IsSmall() const { - if (!IsCritical() && MiddlePortions.size() == 1 && MiddlePortions.begin()->second->GetBlobBytes() < (1 << 20)) { - return true; - } - return false; - } - - bool IsCritical() const { - if (StartPortions.size() && FinishPortions.size()) { - return true; - } - if (MiddlePortions.size() > 1 || StartPortions.size() > 1 || FinishPortions.size() > 1) { - return true; - } - return false; - } - - TBorderPoint(const std::shared_ptr& info) - : LevelInfo(info) { - - } - - ~TBorderPoint() { - for (auto&& i : MiddlePortions) { - if (i.second->IndexKeyStart() == i.second->IndexKeyEnd()) { - LevelInfo->RemovePortion(i.second, 2); - } else { - LevelInfo->RemovePortion(i.second, 1); - } - } - } - - void AddStart(const std::shared_ptr& p) { - Y_ABORT_UNLESS(StartPortions.emplace(p->GetPortion(), p).second); - } - void RemoveStart(const std::shared_ptr& p) { - Y_ABORT_UNLESS(StartPortions.erase(p->GetPortion())); - } - - void AddMiddle(const std::shared_ptr& p, const ui32 portionCriticalWeight) { - Y_ABORT_UNLESS(MiddleWeight.emplace(p->GetPortion(), portionCriticalWeight).second); - Y_ABORT_UNLESS(MiddlePortions.emplace(p->GetPortion(), p).second); - LevelInfo->AddPortion(p, portionCriticalWeight); - } - void RemoveMiddle(const std::shared_ptr& p, const ui32 portionCriticalWeight) { - Y_ABORT_UNLESS(MiddleWeight.erase(p->GetPortion())); - Y_ABORT_UNLESS(MiddlePortions.erase(p->GetPortion())); - LevelInfo->RemovePortion(p, portionCriticalWeight); - } - - void AddFinish(const std::shared_ptr& p) { - Y_ABORT_UNLESS(FinishPortions.emplace(p->GetPortion(), p).second); - } - void RemoveFinish(const std::shared_ptr& p) { - Y_ABORT_UNLESS(FinishPortions.erase(p->GetPortion())); - } - - bool IsEmpty() const { - return StartPortions.empty() && FinishPortions.empty(); - } -}; - -class TPortionsPlacement { -private: - THashSet PortionIds; - std::map Borders; - std::shared_ptr LevelInfo; -public: - TPortionsPlacement(const std::shared_ptr& levelInfo) - : LevelInfo(levelInfo) - { - - } - - class TPortionsScanner { - private: - THashMap> CurrentPortions; - const THashSet& BusyPortions; - public: - - TPortionsScanner(const THashSet& busyPortions) - : BusyPortions(busyPortions) - { - - } - - const THashMap>& GetCurrentPortions() const { - return CurrentPortions; - } - - bool AddBorderPoint(const TBorderPoint& p, bool& hasBusy) { - hasBusy = false; - for (auto&& [_, portionInfo] : p.GetStartPortions()) { - if (BusyPortions.contains(portionInfo->GetAddress())) { - hasBusy = true; - continue; - } - AFL_VERIFY(CurrentPortions.emplace(portionInfo->GetPortion(), portionInfo).second); - } - - for (auto&& [_, portionInfo] : p.GetFinishPortions()) { - if (BusyPortions.contains(portionInfo->GetAddress())) { - continue; - } - AFL_VERIFY(CurrentPortions.erase(portionInfo->GetPortion())); - } - return CurrentPortions.size(); - } - }; - - enum class EChainProblem { - NoProblem, - SmallChunks, - MergeChunks - }; - - std::vector>> GetPortionsToCompact(const ui64 sizeLimit, const THashSet& busyPortions) const { - std::vector>> result; - THashSet readyPortionIds; - ui64 resultSize = 0; - - TPortionsScanner buffer(busyPortions); - THashMap> portionsCurrentChain; - ui64 chainSize = 0; - EChainProblem problemType = EChainProblem::NoProblem; - for (auto&& i : Borders) { - bool hasBusy = false; - if (!buffer.AddBorderPoint(i.second, hasBusy)) { - if (hasBusy && problemType == EChainProblem::SmallChunks) { - chainSize = 0; - portionsCurrentChain.clear(); - problemType = EChainProblem::NoProblem; - } else if (chainSize > (1 << 20)) { - resultSize += chainSize; - std::vector> chain; - for (auto&& i : portionsCurrentChain) { - chain.emplace_back(i.second); - } - result.emplace_back(chain); - chainSize = 0; - portionsCurrentChain.clear(); - problemType = EChainProblem::NoProblem; - } - } else { - if (buffer.GetCurrentPortions().size() > 1) { - problemType = EChainProblem::MergeChunks; - } else if (buffer.GetCurrentPortions().begin()->second->GetBlobBytes() < (1 << 20) && problemType == EChainProblem::NoProblem) { - problemType = EChainProblem::SmallChunks; - } - if (problemType != EChainProblem::NoProblem) { - for (auto&& i : buffer.GetCurrentPortions()) { - if (portionsCurrentChain.emplace(i.second->GetPortion(), i.second).second) { - chainSize += i.second->GetBlobBytes(); - } - } - } - } - if (resultSize + chainSize > sizeLimit) { - break; - } - } - if (portionsCurrentChain.size() > 1) { - std::vector> chain; - for (auto&& i : portionsCurrentChain) { - chain.emplace_back(i.second); - } - result.emplace_back(chain); - } - - return result; - } - - void RemovePortion(const std::shared_ptr& portion) { - Y_ABORT_UNLESS(PortionIds.erase(portion->GetPortion())); - auto itStart = Borders.find(portion->IndexKeyStart()); - AFL_VERIFY(itStart != Borders.end()); - auto itFinish = Borders.find(portion->IndexKeyEnd()); - AFL_VERIFY(itFinish != Borders.end()); - - itStart->second.RemoveStart(portion); - itFinish->second.RemoveFinish(portion); - if (itStart != itFinish) { - for (auto it = itStart; it != itFinish; ++it) { - it->second.RemoveMiddle(portion, 1); - } - if (itFinish->second.IsEmpty()) { - Y_ABORT_UNLESS(Borders.erase(portion->IndexKeyEnd())); - } - if (itStart->second.IsEmpty()) { - Y_ABORT_UNLESS(Borders.erase(portion->IndexKeyStart())); - } - } else { - itStart->second.RemoveMiddle(portion, 2); - if (itStart->second.IsEmpty()) { - Borders.erase(itStart); - } - } - } - - void AddPortion(const std::shared_ptr& portion) { - Y_ABORT_UNLESS(PortionIds.emplace(portion->GetPortion()).second); - auto itStartInfo = Borders.emplace(portion->IndexKeyStart(), TBorderPoint(LevelInfo)); - auto itStart = itStartInfo.first; - if (itStartInfo.second && itStart != Borders.begin()) { - auto itStartCopy = itStart; - --itStartCopy; - itStart->second.InitInternalPoint(itStartCopy->second); - } - auto itFinishInfo = Borders.emplace(portion->IndexKeyEnd(), TBorderPoint(LevelInfo)); - auto itFinish = itFinishInfo.first; - if (itFinishInfo.second) { - Y_ABORT_UNLESS(itFinish != Borders.begin()); - auto itFinishCopy = itFinish; - --itFinishCopy; - itFinish->second.InitInternalPoint(itFinishCopy->second); - } - - itStart->second.AddStart(portion); - itFinish->second.AddFinish(portion); - if (itStart != itFinish) { - for (auto it = itStart; it != itFinish; ++it) { - it->second.AddMiddle(portion, 1); - } - } else { - itStart->second.AddMiddle(portion, 2); - } - } -}; - -class TLevel { -private: - YDB_READONLY(TDuration, CriticalAge, TDuration::Zero()); - YDB_READONLY(ui64, CriticalSize, 0); - std::shared_ptr LevelInfo; - TPortionsPlacement PortionsPlacement; - std::shared_ptr NextLevel; - std::map Borders; - std::map>> PortionByAge; - const ui64 PortionsSizeLimit = (ui64)250 * 1024 * 1024; - TCompactionLimits CompactionLimits; - THashSet PortionIds; - const std::shared_ptr StoragesManager; - std::shared_ptr PrimaryKeysSchema; -public: - TLevel(const TDuration criticalAge, const ui64 criticalSize, std::shared_ptr nextLevel, const std::shared_ptr& storagesManager, std::shared_ptr counters, - const std::shared_ptr& primaryKeysSchema) - : CriticalAge(criticalAge) - , CriticalSize(criticalSize) - , LevelInfo(std::make_shared(counters)) - , PortionsPlacement(LevelInfo) - , NextLevel(nextLevel) - , StoragesManager(storagesManager) - , PrimaryKeysSchema(primaryKeysSchema) - { - CompactionLimits.GranuleSizeForOverloadPrevent = CriticalSize * 0.5; - } - - ui64 GetWeight() const { - return LevelInfo->GetCriticalWeight(); - } - - void ProvidePortionsNextLevel(const TInstant currentInstant) { - if (!NextLevel) { - return; - } - std::vector> portionsForProviding; - for (auto&& i : PortionByAge) { - if (TInstant::MilliSeconds(i.first.GetPlanStep()) + CriticalAge < currentInstant) { - for (auto&& p : i.second) { - portionsForProviding.emplace_back(p.second); - } - } else { - break; - } - } - for (auto&& i : portionsForProviding) { - RemovePortion(i, currentInstant); - NextLevel->AddPortion(i, currentInstant); - } - } - - void AddPortion(const std::shared_ptr& portionInfo, const TInstant addInstant) { - if (TInstant::MilliSeconds(portionInfo->RecordSnapshotMax().GetPlanStep()) + CriticalAge < addInstant) { - Y_ABORT_UNLESS(!PortionIds.contains(portionInfo->GetPortion())); - if (NextLevel) { - return NextLevel->AddPortion(portionInfo, addInstant); - } - } - PortionsPlacement.AddPortion(portionInfo); - Y_ABORT_UNLESS(PortionByAge[portionInfo->RecordSnapshotMax()].emplace(portionInfo->GetPortion(), portionInfo).second); - ProvidePortionsNextLevel(addInstant); - } - - void RemovePortion(const std::shared_ptr& portionInfo, const TInstant removeInstant) { - PortionsPlacement.RemovePortion(portionInfo); - { - auto it = PortionByAge.find(portionInfo->RecordSnapshotMax()); - Y_ABORT_UNLESS(it != PortionByAge.end()); - Y_ABORT_UNLESS(it->second.erase(portionInfo->GetPortion())); - if (it->second.empty()) { - PortionByAge.erase(it); - } - } - ProvidePortionsNextLevel(removeInstant); - } - - std::shared_ptr BuildOptimizationTask(const TCompactionLimits& /*limits*/, std::shared_ptr granule, const THashSet& busyPortions, const TInstant /*currentInstant*/) const { - std::vector>> portionGroups = PortionsPlacement.GetPortionsToCompact(PortionsSizeLimit, busyPortions); - if (portionGroups.empty()) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "optimization_task_skipped"); - return nullptr; - } - std::vector> portions; - std::vector positions; - for (auto&& i : portionGroups) { - portions.insert(portions.end(), i.begin(), i.end()); - std::optional position; - for (auto&& p : i) { - NIndexedReader::TSortableBatchPosition pos(p->IndexKeyEnd().ToBatch(PrimaryKeysSchema), 0, PrimaryKeysSchema->field_names(), {}, false); - if (!position || position->Compare(pos) == std::partial_ordering::less) { - position = pos; - } - } - Y_ABORT_UNLESS(position); - positions.emplace_back(*position); - } - TSaverContext saverContext(StoragesManager->GetOperator(IStoragesManager::DefaultStorageId), StoragesManager); - auto result = std::make_shared(CompactionLimits.GetSplitSettings(), granule, portions, saverContext); - for (auto&& i : positions) { - result->AddCheckPoint(i); - } - return result; - } - -}; - -class TLevelsOptimizerPlanner: public IOptimizerPlanner { -private: - using TBase = IOptimizerPlanner; - std::shared_ptr L3; - std::shared_ptr LMax; - std::shared_ptr LStart; - const std::shared_ptr StoragesManager; - std::shared_ptr Counters; -protected: - virtual std::vector GetBucketPositions() const override { - return {}; - } - - virtual void DoModifyPortions(const THashMap>& add, const THashMap>& remove) override { - const TInstant currentInstant = TInstant::Now(); - for (auto&& [_, i] : remove) { - if (i->GetMeta().GetTierName() != IStoragesManager::DefaultStorageId && i->GetMeta().GetTierName() != "") { - continue; - } - if (!i->GetMeta().RecordSnapshotMax) { - LMax->RemovePortion(i, currentInstant); - } else { - LStart->RemovePortion(i, currentInstant); - } - } - for (auto&& [_, i] : add) { - if (i->GetMeta().GetTierName() != IStoragesManager::DefaultStorageId && i->GetMeta().GetTierName() != "") { - continue; - } - if (!i->GetMeta().RecordSnapshotMax) { - LMax->AddPortion(i, currentInstant); - } else { - LStart->AddPortion(i, currentInstant); - } - } - } - virtual std::shared_ptr DoGetOptimizationTask(const TCompactionLimits& limits, std::shared_ptr granule, const THashSet& busyPortions) const override { - return LStart->BuildOptimizationTask(limits, granule, busyPortions, TInstant::Now()); - - } - virtual TOptimizationPriority DoGetUsefulMetric() const override { - return TOptimizationPriority::Critical(LStart->GetWeight()); - } - virtual TString DoDebugString() const override { - return ""; - } - virtual void DoActualize(const TInstant /*currentInstant*/) override { - - } -public: - TLevelsOptimizerPlanner(const ui64 pathId, const std::shared_ptr& storagesManager, const std::shared_ptr& primaryKeysSchema) - : TBase(pathId) - , StoragesManager(storagesManager) - , Counters(std::make_shared()) - { - L3 = std::make_shared(TDuration::Seconds(120), 24 << 20, nullptr, StoragesManager, Counters, primaryKeysSchema); - LMax = L3; - LStart = L3; - } -}; - -} // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/ya.make b/ydb/core/tx/columnshard/engines/storage/optimizer/levels/ya.make deleted file mode 100644 index 3f96a5717477..000000000000 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/levels/ya.make +++ /dev/null @@ -1,15 +0,0 @@ -LIBRARY() - -SRCS( - optimizer.cpp - counters.cpp -) - -PEERDIR( - contrib/libs/apache/arrow - ydb/core/protos - ydb/core/formats/arrow - ydb/core/tx/columnshard/engines/changes/abstract -) - -END() diff --git a/ydb/core/tx/columnshard/engines/storage/optimizer/ya.make b/ydb/core/tx/columnshard/engines/storage/optimizer/ya.make index e1362859aaea..1d73127f26c2 100644 --- a/ydb/core/tx/columnshard/engines/storage/optimizer/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/optimizer/ya.make @@ -2,8 +2,6 @@ LIBRARY() PEERDIR( ydb/core/tx/columnshard/engines/storage/optimizer/abstract - ydb/core/tx/columnshard/engines/storage/optimizer/intervals - ydb/core/tx/columnshard/engines/storage/optimizer/levels ydb/core/tx/columnshard/engines/storage/optimizer/lbuckets ) diff --git a/ydb/core/tx/columnshard/engines/storage/storage.cpp b/ydb/core/tx/columnshard/engines/storage/storage.cpp index c51c2bd6e166..2aa5c0af7244 100644 --- a/ydb/core/tx/columnshard/engines/storage/storage.cpp +++ b/ydb/core/tx/columnshard/engines/storage/storage.cpp @@ -1,37 +1,50 @@ #include "storage.h" +#include namespace NKikimr::NOlap { -void TGranulesStorage::UpdateGranuleInfo(const TGranuleMeta& granule) { - if (PackModificationFlag) { - PackModifiedGranules[granule.GetPathId()] = &granule; - return; - } -} - -std::shared_ptr TGranulesStorage::GetGranuleForCompaction(const THashMap>& granules, const THashSet& busyGranuleIds) const { - const TInstant now = TInstant::Now(); - std::optional priority; - std::shared_ptr granule; - for (auto&& i : granules) { +std::shared_ptr TGranulesStorage::GetGranuleForCompaction(const std::shared_ptr& dataLocksManager) const { + const TInstant now = HasAppData() ? AppDataVerified().TimeProvider->Now() : TInstant::Now(); + std::map> granulesSorted; + ui32 countChecker = 0; + std::optional priorityChecker; + for (auto&& i : Tables) { + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("path_id", i.first); i.second->ActualizeOptimizer(now); - if (busyGranuleIds.contains(i.first)) { + auto gPriority = i.second->GetCompactionPriority(); + if (gPriority.IsZero() || (priorityChecker && gPriority < *priorityChecker)) { continue; } - if (!priority || *priority < i.second->GetCompactionPriority()) { - priority = i.second->GetCompactionPriority(); - granule = i.second; + granulesSorted.emplace(gPriority, i.second); + if (++countChecker % 100 == 0) { + for (auto&& it = granulesSorted.rbegin(); it != granulesSorted.rend(); ++it) { + if (!it->second->IsLockedOptimizer(dataLocksManager)) { + priorityChecker = it->first; + break; + } + } } } - if (!priority) { + if (granulesSorted.empty()) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "no_granules"); return nullptr; } - if (priority->IsZero()) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "zero_priority"); - return nullptr; + for (auto&& it = granulesSorted.rbegin(); it != granulesSorted.rend(); ++it) { + if (priorityChecker && it->first < *priorityChecker) { + continue; + } + NActors::TLogContextGuard lGuard = NActors::TLogContextBuilder::Build()("path_id", it->second->GetPathId()); + if (it->second->IsLockedOptimizer(dataLocksManager)) { + Counters.OnGranuleOptimizerLocked(); + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "skip_optimizer_throught_lock")("priority", it->first.DebugString()); + } else { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", "granule_compaction_weight")("priority", it->first.DebugString()); + return it->second; + } } - return granule; + + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "all_significant_granules_locked")("count", granulesSorted.size()); + return nullptr; } } // namespace NKikimr::NOlap diff --git a/ydb/core/tx/columnshard/engines/storage/storage.h b/ydb/core/tx/columnshard/engines/storage/storage.h index 7ddba8009f6f..b58d61a80c26 100644 --- a/ydb/core/tx/columnshard/engines/storage/storage.h +++ b/ydb/core/tx/columnshard/engines/storage/storage.h @@ -6,13 +6,15 @@ namespace NKikimr::NOlap { -class TGranulesStorage { +class TGranulesStat { private: - const TCompactionLimits Limits; + i64 MetadataMemoryPortionsSize = 0; const NColumnShard::TEngineLogsCounters Counters; - std::shared_ptr StoragesManager; bool PackModificationFlag = false; THashMap PackModifiedGranules; + + static inline TAtomicCounter SumMetadataMemoryPortionsSize = 0; + void StartModificationImpl() { Y_ABORT_UNLESS(!PackModificationFlag); PackModificationFlag = true; @@ -28,27 +30,21 @@ class TGranulesStorage { } public: - TGranulesStorage(const NColumnShard::TEngineLogsCounters counters, const TCompactionLimits& limits, const std::shared_ptr& storagesManager) - : Limits(limits) - , Counters(counters) - , StoragesManager(storagesManager) + TGranulesStat(const NColumnShard::TEngineLogsCounters& counters) + : Counters(counters) { } - const std::shared_ptr& GetStoragesManager() const { - return StoragesManager; - } - const NColumnShard::TEngineLogsCounters& GetCounters() const { return Counters; } class TModificationGuard: TNonCopyable { private: - TGranulesStorage& Owner; + TGranulesStat& Owner; public: - TModificationGuard(TGranulesStorage& storage) + TModificationGuard(TGranulesStat& storage) : Owner(storage) { Owner.StartModificationImpl(); } @@ -62,9 +58,123 @@ class TGranulesStorage { return TModificationGuard(*this); } - std::shared_ptr GetGranuleForCompaction(const THashMap>& granules, const THashSet& busyGranuleIds) const; + static ui64 GetSumMetadataMemoryPortionsSize() { + return SumMetadataMemoryPortionsSize.Val(); + } + + i64 GetMetadataMemoryPortionsSize() const { + return MetadataMemoryPortionsSize; + } + + ~TGranulesStat() { + SumMetadataMemoryPortionsSize.Sub(MetadataMemoryPortionsSize); + } + + void UpdateGranuleInfo(const TGranuleMeta& granule) { + if (PackModificationFlag) { + PackModifiedGranules[granule.GetPathId()] = &granule; + return; + } + } + + void OnRemovePortion(const TPortionInfo& portion) { + MetadataMemoryPortionsSize -= portion.GetMetadataMemorySize(); + AFL_VERIFY(MetadataMemoryPortionsSize >= 0); + const i64 value = SumMetadataMemoryPortionsSize.Sub(portion.GetMetadataMemorySize()); + Counters.OnIndexMetadataUsageBytes(value); + } + + void OnAddPortion(const TPortionInfo& portion) { + MetadataMemoryPortionsSize += portion.GetMetadataMemorySize(); + const i64 value = SumMetadataMemoryPortionsSize.Add(portion.GetMetadataMemorySize()); + Counters.OnIndexMetadataUsageBytes(value); + } + +}; + +class TGranulesStorage { +private: + const NColumnShard::TEngineLogsCounters Counters; + std::shared_ptr StoragesManager; + THashMap> Tables; // pathId into Granule that equal to Table + std::shared_ptr Stats; +public: + TGranulesStorage(const NColumnShard::TEngineLogsCounters counters, const std::shared_ptr& storagesManager) + : Counters(counters) + , StoragesManager(storagesManager) + , Stats(std::make_shared(Counters)) + { + + } + + const std::shared_ptr& GetStats() const { + return Stats; + } + + std::shared_ptr RegisterTable(const ui64 pathId, const NColumnShard::TGranuleDataCounters& counters, const TVersionedIndex& versionedIndex) { + auto infoEmplace = Tables.emplace(pathId, std::make_shared(pathId, *this, counters, versionedIndex)); + AFL_VERIFY(infoEmplace.second); + return infoEmplace.first->second; + } + + void EraseTable(const ui64 pathId) { + auto it = Tables.find(pathId); + Y_ABORT_UNLESS(it != Tables.end()); + Y_ABORT_UNLESS(it->second->IsErasable()); + Tables.erase(it); + } + + const THashMap>& GetTables() const { + return Tables; + } + + void ReturnToIndexes(const THashMap>& portions) const { + for (auto&& [g, portionIds] : portions) { + auto it = Tables.find(g); + AFL_VERIFY(it != Tables.end()); + it->second->ReturnToIndexes(portionIds); + } + } + + std::vector> GetTables(const std::optional pathIdFrom, const std::optional pathIdTo) const { + std::vector> result; + for (auto&& i : Tables) { + if (pathIdFrom && i.first < *pathIdFrom) { + continue; + } + if (pathIdTo && i.first > *pathIdTo) { + continue; + } + result.emplace_back(i.second); + } + return result; + } + + std::shared_ptr GetPortionOptional(const ui64 pathId, const ui64 portionId) const { + auto it = Tables.find(pathId); + if (it == Tables.end()) { + return nullptr; + } + return it->second->GetPortionOptional(portionId); + } + + std::shared_ptr GetGranuleOptional(const ui64 pathId) const { + auto it = Tables.find(pathId); + if (it == Tables.end()) { + return nullptr; + } + return it->second; + } + + const std::shared_ptr& GetStoragesManager() const { + return StoragesManager; + } + + const NColumnShard::TEngineLogsCounters& GetCounters() const { + return Counters; + } - void UpdateGranuleInfo(const TGranuleMeta& granule); + std::shared_ptr GetGranuleForCompaction(const std::shared_ptr& locksManager) const; }; diff --git a/ydb/core/tx/columnshard/engines/storage/ya.make b/ydb/core/tx/columnshard/engines/storage/ya.make index 811707b20d2d..2f1af88b17a7 100644 --- a/ydb/core/tx/columnshard/engines/storage/ya.make +++ b/ydb/core/tx/columnshard/engines/storage/ya.make @@ -9,6 +9,8 @@ PEERDIR( contrib/libs/apache/arrow ydb/core/protos ydb/core/tx/columnshard/engines/storage/optimizer + ydb/core/tx/columnshard/engines/storage/actualizer + ydb/core/tx/columnshard/engines/storage/chunks ydb/core/formats/arrow ) diff --git a/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp b/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp index 9e2dad244ac6..8362ee918b85 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_insert_table.cpp @@ -47,7 +47,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) { ui64 writeId = 0; ui64 tableId = 0; TString dedupId = "0"; - TUnifiedBlobId blobId1(2222, 1, 1, 100, 1); + TUnifiedBlobId blobId1(2222, 1, 1, 100, 2, 0, 1); TTestInsertTableDB dbTable; TInsertTable insertTable; @@ -62,7 +62,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestInsertTable) { UNIT_ASSERT(!ok); // insert different blodId with the same writeId and dedupId - TUnifiedBlobId blobId2(2222, 1, 2, 100, 1); + TUnifiedBlobId blobId2(2222, 1, 2, 100, 2, 0, 1); ok = insertTable.Insert(dbTable, TInsertedData(writeId, tableId, dedupId, blobId2, TLocalHelper::GetMetaProto(), indexSnapshot, {})); UNIT_ASSERT(!ok); diff --git a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp index a5c07fcc014c..0770a69d456d 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_logs_engine.cpp @@ -2,13 +2,19 @@ #include #include #include -#include +#include +#include +#include #include #include #include #include - +#include +#include +#include +#include +#include namespace NKikimr { @@ -21,6 +27,8 @@ using namespace NKikimr::NOlap::NEngines::NTest; namespace { +std::shared_ptr EmptyDataLocksManager = std::make_shared(); + class TTestDbWrapper : public IDbWrapper { private: std::map> LoadContexts; @@ -79,19 +87,23 @@ class TTestDbWrapper : public IDbWrapper { } auto& data = Indices[0].Columns[portion.GetPathId()]; - NOlap::TColumnChunkLoadContext loadContext(row.GetAddress(), row.BlobRange, rowProto); + NOlap::TColumnChunkLoadContext loadContext(row.GetAddress(), portion.RestoreBlobRange(row.BlobRange), rowProto); auto itInsertInfo = LoadContexts[portion.GetAddress()].emplace(row.GetAddress(), loadContext); if (!itInsertInfo.second) { itInsertInfo.first->second = loadContext; } auto it = data.find(portion.GetPortion()); if (it == data.end()) { - it = data.emplace(portion.GetPortion(), portion.CopyWithFilteredColumns({})).first; + it = data.emplace(portion.GetPortion(), portion.CopyBeforeChunksRebuild()).first; } else { Y_ABORT_UNLESS(portion.GetPathId() == it->second.GetPathId() && portion.GetPortion() == it->second.GetPortion()); } - it->second.SetMinSnapshot(portion.GetMinSnapshot()); - it->second.SetRemoveSnapshot(portion.GetRemoveSnapshot()); + it->second.SetMinSnapshotDeprecated(portion.GetMinSnapshotDeprecated()); + if (portion.HasRemoveSnapshot()) { + it->second.SetRemoveSnapshot(portion.GetRemoveSnapshotVerified()); + } else { + AFL_VERIFY(!it->second.HasRemoveSnapshot()); + } bool replaced = false; for (auto& rec : it->second.Records) { @@ -163,21 +175,21 @@ class TTestDbWrapper : public IDbWrapper { THashMap Indices; }; -static const std::vector> testColumns = { +static const std::vector testColumns = { // PK - {"timestamp", TTypeInfo(NTypeIds::Timestamp) }, - {"resource_type", TTypeInfo(NTypeIds::Utf8) }, - {"resource_id", TTypeInfo(NTypeIds::Utf8) }, - {"uid", TTypeInfo(NTypeIds::Utf8) }, + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ), // - {"message", TTypeInfo(NTypeIds::Utf8) } + NArrow::NTest::TTestColumn("message", TTypeInfo(NTypeIds::Utf8) ) }; -static const std::vector> testKey = { - {"timestamp", TTypeInfo(NTypeIds::Timestamp) }, - {"resource_type", TTypeInfo(NTypeIds::Utf8) }, - {"resource_id", TTypeInfo(NTypeIds::Utf8) }, - {"uid", TTypeInfo(NTypeIds::Utf8) } +static const std::vector testKey = { + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ) }; template @@ -243,25 +255,18 @@ TString MakeTestBlob(i64 start = 0, i64 end = 100) { return NArrow::SerializeBatchNoCompression(batch); } -void AddIdsToBlobs(std::vector& portions, THashMap& blobs, ui32& step) { +void AddIdsToBlobs(std::vector& portions, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { for (auto& portion : portions) { for (auto& rec : portion.GetPortionInfo().Records) { - rec.BlobRange.BlobId = MakeUnifiedBlobId(++step, portion.GetBlobFullSizeVerified(rec.ColumnId, rec.Chunk)); - blobs[rec.BlobRange] = portion.GetBlobByRangeVerified(rec.ColumnId, rec.Chunk); + rec.BlobRange.BlobIdx = portion.GetPortionInfo().RegisterBlobId(MakeUnifiedBlobId(++step, portion.GetBlobFullSizeVerified(rec.ColumnId, rec.Chunk))); + TString data = portion.GetBlobByRangeVerified(rec.ColumnId, rec.Chunk); + blobs.Add(IStoragesManager::DefaultStorageId, portion.GetPortionInfo().RestoreBlobRange(rec.BlobRange), std::move(data)); } } } -TCompactionLimits TestLimits() { - TCompactionLimits limits; - limits.GranuleBlobSplitSize = 1024; - limits.GranuleSizeForOverloadPrevent = 400 * 1024; - limits.GranuleOverloadSize = 800 * 1024; - return limits; -} - bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, - std::vector&& dataToIndex, THashMap& blobs, ui32& step) { + std::vector&& dataToIndex, NBlobOperations::NRead::TCompositeReadBlobs& blobs, ui32& step) { for (ui32 i = 0; i < dataToIndex.size(); ++i) { // Commited data always has nonzero planstep (for WriteLoadRead tests) @@ -272,11 +277,11 @@ bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, return false; } - changes->Blobs.insert(blobs.begin(), blobs.end()); - blobs.clear(); + changes->Blobs = std::move(blobs); + blobs.Clear(); changes->StartEmergency(); - NOlap::TConstructionContext context(engine.GetVersionedIndex(), NColumnShard::TIndexationCounters("Indexation")); + NOlap::TConstructionContext context(engine.GetVersionedIndex(), NColumnShard::TIndexationCounters("Indexation"), snap); Y_ABORT_UNLESS(changes->ConstructBlobs(context).Ok()); UNIT_ASSERT_VALUES_EQUAL(changes->AppendedPortions.size(), 1); @@ -289,7 +294,12 @@ bool Insert(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, AddIdsToBlobs(changes->AppendedPortions, blobs, step); const bool result = engine.ApplyChanges(db, changes, snap); - changes->AbortEmergency(); + + NOlap::TWriteIndexContext contextExecute(nullptr, db, engine); + changes->WriteIndexOnExecute(nullptr, contextExecute); + NOlap::TWriteIndexCompleteContext contextComplete(NActors::TActivationContext::AsActorContext(), 0, 0, TDuration::Zero(), engine); + changes->WriteIndexOnComplete(nullptr, contextComplete); + changes->AbortEmergency("testing"); return result; } @@ -299,14 +309,14 @@ struct TExpected { ui32 NewGranules; }; -bool Compact(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, THashMap&& blobs, ui32& step, +bool Compact(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, NBlobOperations::NRead::TCompositeReadBlobs&& blobs, ui32& step, const TExpected& /*expected*/, THashMap* blobsPool = nullptr) { - std::shared_ptr changes = dynamic_pointer_cast(engine.StartCompaction(TestLimits(), {})); + std::shared_ptr changes = dynamic_pointer_cast(engine.StartCompaction(EmptyDataLocksManager)); UNIT_ASSERT(changes); // UNIT_ASSERT_VALUES_EQUAL(changes->SwitchedPortions.size(), expected.SrcPortions); - changes->SetBlobs(std::move(blobs)); + changes->Blobs = std::move(blobs); changes->StartEmergency(); - NOlap::TConstructionContext context(engine.GetVersionedIndex(), NColumnShard::TIndexationCounters("Compaction")); + NOlap::TConstructionContext context(engine.GetVersionedIndex(), NColumnShard::TIndexationCounters("Compaction"), NOlap::TSnapshot(step, 1)); Y_ABORT_UNLESS(changes->ConstructBlobs(context).Ok()); // UNIT_ASSERT_VALUES_EQUAL(changes->AppendedPortions.size(), expected.NewPortions); @@ -315,20 +325,24 @@ bool Compact(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, T // UNIT_ASSERT_VALUES_EQUAL(changes->GetTmpGranuleIds().size(), expected.NewGranules); const bool result = engine.ApplyChanges(db, changes, snap); + NOlap::TWriteIndexContext contextExecute(nullptr, db, engine); + changes->WriteIndexOnExecute(nullptr, contextExecute); + NOlap::TWriteIndexCompleteContext contextComplete(NActors::TActivationContext::AsActorContext(), 0, 0, TDuration::Zero(), engine); + changes->WriteIndexOnComplete(nullptr, contextComplete); if (blobsPool) { for (auto&& i : changes->AppendedPortions) { for (auto&& r : i.GetPortionInfo().Records) { - Y_ABORT_UNLESS(blobsPool->emplace(r.BlobRange, i.GetBlobByRangeVerified(r.ColumnId, r.Chunk)).second); + Y_ABORT_UNLESS(blobsPool->emplace(i.GetPortionInfo().RestoreBlobRange(r.BlobRange), i.GetBlobByRangeVerified(r.ColumnId, r.Chunk)).second); } } } - changes->AbortEmergency(); + changes->AbortEmergency("testing"); return result; } bool Cleanup(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, ui32 expectedToDrop) { THashSet pathsToDrop; - std::shared_ptr changes = engine.StartCleanup(snap, pathsToDrop, 1000); + std::shared_ptr changes = engine.StartCleanupPortions(snap, pathsToDrop, EmptyDataLocksManager); UNIT_ASSERT(changes || !expectedToDrop); if (!expectedToDrop && !changes) { return true; @@ -338,20 +352,29 @@ bool Cleanup(TColumnEngineForLogs& engine, TTestDbWrapper& db, TSnapshot snap, u changes->StartEmergency(); const bool result = engine.ApplyChanges(db, changes, snap); - changes->AbortEmergency(); + NOlap::TWriteIndexContext contextExecute(nullptr, db, engine); + changes->WriteIndexOnExecute(nullptr, contextExecute); + NOlap::TWriteIndexCompleteContext contextComplete(NActors::TActivationContext::AsActorContext(), 0, 0, TDuration::Zero(), engine); + changes->WriteIndexOnComplete(nullptr, contextComplete); + changes->AbortEmergency("testing"); return result; } bool Ttl(TColumnEngineForLogs& engine, TTestDbWrapper& db, const THashMap& pathEviction, ui32 expectedToDrop) { - std::shared_ptr changes = engine.StartTtl(pathEviction, {}, 512 * 1024 * 1024); - UNIT_ASSERT(changes); + std::vector> vChanges = engine.StartTtl(pathEviction, EmptyDataLocksManager, 512 * 1024 * 1024); + AFL_VERIFY(vChanges.size() == 1)("count", vChanges.size()); + auto changes = vChanges.front(); UNIT_ASSERT_VALUES_EQUAL(changes->PortionsToRemove.size(), expectedToDrop); changes->StartEmergency(); const bool result = engine.ApplyChanges(db, changes, TSnapshot(1,1)); - changes->AbortEmergency(); + NOlap::TWriteIndexContext contextExecute(nullptr, db, engine); + changes->WriteIndexOnExecute(nullptr, contextExecute); + NOlap::TWriteIndexCompleteContext contextComplete(NActors::TActivationContext::AsActorContext(), 0, 0, TDuration::Zero(), engine); + changes->WriteIndexOnComplete(nullptr, contextComplete); + changes->AbortEmergency("testing"); return result; } @@ -373,24 +396,15 @@ std::shared_ptr MakeStrPredicate(const std::string& key, NArrow::EOp } // namespace -class TTestStoragesManager: public NOlap::IStoragesManager { -private: - using TBase = NOlap::IStoragesManager; - TIntrusivePtr TabletInfo = new TTabletStorageInfo(); -protected: - virtual std::shared_ptr DoBuildOperator(const TString& storageId) override { - if (storageId == TBase::DefaultStorageId) { - return std::make_shared(storageId, NActors::TActorId(), TabletInfo, 1); - } else - return nullptr; - } -}; +std::shared_ptr InitializeStorageManager() { + return NKikimr::NOlap::TTestStoragesManager::GetInstance(); +} -std::shared_ptr CommonStoragesManager = std::make_shared(); +std::shared_ptr CommonStoragesManager = InitializeStorageManager(); Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { - void WriteLoadRead(const std::vector>& ydbSchema, - const std::vector>& key) { + void WriteLoadRead(const std::vector& ydbSchema, + const std::vector& key) { TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); @@ -404,9 +418,8 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] // load - TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); TSnapshot indexSnaphot(1, 1); - engine.RegisterSchemaVersion(indexSnaphot, TIndexInfo(tableInfo)); + TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnaphot, TIndexInfo(tableInfo)); for (auto&& i : paths) { engine.RegisterTable(i); } @@ -420,20 +433,24 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // write ui32 step = 1000; - THashMap blobs; - blobs[blobRanges[0]] = testBlob; - blobs[blobRanges[1]] = testBlob; - Insert(engine, db, TSnapshot(1, 2), std::move(dataToIndex), blobs, step); + { + NBlobOperations::NRead::TCompositeReadBlobs blobs; + TString str1 = testBlob; + blobs.Add(IStoragesManager::DefaultStorageId, blobRanges[0], std::move(str1)); + str1 = testBlob; + blobs.Add(IStoragesManager::DefaultStorageId, blobRanges[1], std::move(str1)); + Insert(engine, db, TSnapshot(1, 2), std::move(dataToIndex), blobs, step); + } // selects auto lastSchema = engine.GetVersionedIndex().GetLastSchema(); UNIT_ASSERT_EQUAL(lastSchema->GetSnapshot(), indexSnaphot); const TIndexInfo& indexInfo = lastSchema->GetIndexInfo(); - THashSet oneColumnId = { indexInfo.GetColumnId(testColumns[0].first) }; + THashSet oneColumnId = { indexInfo.GetColumnId(testColumns[0].GetName()) }; THashSet columnIds; - for (auto& [column, typeId] : testColumns) { - columnIds.insert(indexInfo.GetColumnId(column)); + for (auto& c : testColumns) { + columnIds.insert(indexInfo.GetColumnId(c.GetName())); } { // select from snap before insert @@ -471,18 +488,18 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } Y_UNIT_TEST(IndexWriteLoadReadStrPK) { - std::vector> key = { - {"resource_type", TTypeInfo(NTypeIds::Utf8) }, - {"resource_id", TTypeInfo(NTypeIds::Utf8) }, - {"uid", TTypeInfo(NTypeIds::Utf8) }, - {"timestamp", TTypeInfo(NTypeIds::Timestamp) } + std::vector key = { + NArrow::NTest::TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ) }; WriteLoadRead(testColumns, key); } - void ReadWithPredicates(const std::vector>& ydbSchema, - const std::vector>& key) { + void ReadWithPredicates(const std::vector& ydbSchema, + const std::vector& key) { TTestDbWrapper db; TIndexInfo tableInfo = NColumnShard::BuildTableInfo(ydbSchema, key); @@ -490,8 +507,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { ui32 step = 1000; TSnapshot indexSnapshot(1, 1); - TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); - engine.RegisterSchemaVersion(indexSnapshot, TIndexInfo(tableInfo)); + TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnapshot, TIndexInfo(tableInfo)); engine.RegisterTable(pathId); engine.Load(db); @@ -503,8 +519,9 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { for (ui64 txId = 1; txId <= 20; ++txId, rowPos += numRows) { TString testBlob = MakeTestBlob(rowPos, rowPos + numRows); auto blobRange = MakeBlobRange(++step, testBlob.size()); - THashMap blobs; - blobs[blobRange] = testBlob; + NBlobOperations::NRead::TCompositeReadBlobs blobs; + TString str1 = testBlob; + blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(str1)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] std::vector dataToIndex; @@ -528,7 +545,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { planStep = 3; const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = { indexInfo.GetColumnId(key[0].first) }; + THashSet oneColumnId = { indexInfo.GetColumnId(key[0].GetName()) }; { // full scan ui64 txId = 1; @@ -541,7 +558,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { ui64 txId = 1; std::shared_ptr gt10k = MakePredicate(10000, NArrow::EOperation::Greater); - if (key[0].second == TTypeInfo(NTypeIds::Utf8)) { + if (key[0].GetType() == TTypeInfo(NTypeIds::Utf8)) { gt10k = MakeStrPredicate("10000", NArrow::EOperation::Greater); } NOlap::TPKRangesFilter pkFilter(false); @@ -553,7 +570,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { { ui64 txId = 1; std::shared_ptr lt10k = MakePredicate(8999, NArrow::EOperation::Less); // TODO: better border checks - if (key[0].second == TTypeInfo(NTypeIds::Utf8)) { + if (key[0].GetType() == TTypeInfo(NTypeIds::Utf8)) { lt10k = MakeStrPredicate("08999", NArrow::EOperation::Less); } NOlap::TPKRangesFilter pkFilter(false); @@ -568,11 +585,11 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } Y_UNIT_TEST(IndexReadWithPredicatesStrPK) { - std::vector> key = { - {"resource_type", TTypeInfo(NTypeIds::Utf8) }, - {"resource_id", TTypeInfo(NTypeIds::Utf8) }, - {"uid", TTypeInfo(NTypeIds::Utf8) }, - {"timestamp", TTypeInfo(NTypeIds::Timestamp) } + std::vector key = { + NArrow::NTest::TTestColumn("resource_type", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ) }; ReadWithPredicates(testColumns, key); @@ -589,20 +606,19 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { // inserts ui64 planStep = 1; - TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); TSnapshot indexSnapshot(1, 1); - engine.RegisterSchemaVersion(indexSnapshot, TIndexInfo(tableInfo)); + TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnapshot, TIndexInfo(tableInfo)); engine.RegisterTable(pathId); engine.Load(db); ui64 numRows = 1000; ui64 rowPos = 0; - THashMap blobsAll; + NBlobOperations::NRead::TCompositeReadBlobs blobsAll; for (ui64 txId = 1; txId <= 100; ++txId, rowPos += numRows) { TString testBlob = MakeTestBlob(rowPos, rowPos + numRows); auto blobRange = MakeBlobRange(++step, testBlob.size()); - THashMap blobs; - blobs[blobRange] = testBlob; + NBlobOperations::NRead::TCompositeReadBlobs blobs; + blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(testBlob)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] std::vector dataToIndex; @@ -610,15 +626,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { TInsertedData(txId, pathId, "", blobRange.BlobId, TLocalHelper::GetMetaProto(), 0, {})); bool ok = Insert(engine, db, TSnapshot(planStep, txId), std::move(dataToIndex), blobs, step); - for (auto&& i : blobs) { - blobsAll[i.first] = i.second; - } + blobsAll.Merge(std::move(blobs)); UNIT_ASSERT(ok); } { // check it's overloaded after reload - TColumnEngineForLogs tmpEngine(0, TestLimits(), CommonStoragesManager); - tmpEngine.RegisterSchemaVersion(TSnapshot::Zero(), TIndexInfo(tableInfo)); + TColumnEngineForLogs tmpEngine(0, CommonStoragesManager, TSnapshot::Zero(), TIndexInfo(tableInfo)); tmpEngine.RegisterTable(pathId); tmpEngine.Load(db); } @@ -635,8 +648,8 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { for (ui64 txId = 1; txId <= 2; ++txId, rowPos += numRows) { TString testBlob = MakeTestBlob(rowPos, rowPos + numRows); auto blobRange = MakeBlobRange(++step, testBlob.size()); - THashMap blobs; - blobs[blobRange] = testBlob; + NBlobOperations::NRead::TCompositeReadBlobs blobs; + blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(testBlob)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] std::vector dataToIndex; @@ -648,8 +661,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } { // check it's not overloaded after reload - TColumnEngineForLogs tmpEngine(0, TestLimits(), CommonStoragesManager); - tmpEngine.RegisterSchemaVersion(TSnapshot::Zero(), TIndexInfo(tableInfo)); + TColumnEngineForLogs tmpEngine(0, CommonStoragesManager, TSnapshot::Zero(), TIndexInfo(tableInfo)); tmpEngine.RegisterTable(pathId); tmpEngine.Load(db); } @@ -666,8 +678,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { ui64 planStep = 1; TSnapshot indexSnapshot(1, 1); { - TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); - engine.RegisterSchemaVersion(indexSnapshot, TIndexInfo(tableInfo)); + TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnapshot, TIndexInfo(tableInfo)); engine.RegisterTable(pathId); engine.Load(db); @@ -676,8 +687,9 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { for (ui64 txId = 1; txId <= 20; ++txId, rowPos += numRows) { TString testBlob = MakeTestBlob(rowPos, rowPos + numRows); auto blobRange = MakeBlobRange(++step, testBlob.size()); - THashMap blobs; - blobs[blobRange] = testBlob; + NBlobOperations::NRead::TCompositeReadBlobs blobs; + TString str1 = testBlob; + blobs.Add(IStoragesManager::DefaultStorageId, blobRange, std::move(str1)); // PlanStep, TxId, PathId, DedupId, BlobId, Data, [Metadata] std::vector dataToIndex; @@ -698,7 +710,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { planStep = 3; const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].first)}; + THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].GetName())}; { // full scan ui64 txId = 1; @@ -719,7 +731,7 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { std::shared_ptr ttlColType = arrow::timestamp(arrow::TimeUnit::MICRO); THashMap pathTtls; NOlap::TTiering tiering; - tiering.Ttl = NOlap::TTierInfo::MakeTtl(TDuration::MicroSeconds(TInstant::Now().MicroSeconds() - 10000), "timestamp"); + AFL_VERIFY(tiering.Add(NOlap::TTierInfo::MakeTtl(TDuration::MicroSeconds(TInstant::Now().MicroSeconds() - 10000), "timestamp"))); pathTtls.emplace(pathId, std::move(tiering)); Ttl(engine, db, pathTtls, 10); @@ -733,13 +745,12 @@ Y_UNIT_TEST_SUITE(TColumnEngineTestLogs) { } { // load - TColumnEngineForLogs engine(0, TestLimits(), CommonStoragesManager); - engine.RegisterSchemaVersion(indexSnapshot, TIndexInfo(tableInfo)); + TColumnEngineForLogs engine(0, CommonStoragesManager, indexSnapshot, TIndexInfo(tableInfo)); engine.RegisterTable(pathId); engine.Load(db); const TIndexInfo& indexInfo = engine.GetVersionedIndex().GetLastSchema()->GetIndexInfo(); - THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].first)}; + THashSet oneColumnId = {indexInfo.GetColumnId(testColumns[0].GetName())}; { // full scan ui64 txId = 1; diff --git a/ydb/core/tx/columnshard/engines/ut/ut_program.cpp b/ydb/core/tx/columnshard/engines/ut/ut_program.cpp index 4cfe3282267a..a4586c489460 100644 --- a/ydb/core/tx/columnshard/engines/ut/ut_program.cpp +++ b/ydb/core/tx/columnshard/engines/ut/ut_program.cpp @@ -1,7 +1,8 @@ #include +#include -#include #include +#include #include #include @@ -20,16 +21,16 @@ using TTypeId = NScheme::TTypeId; using TTypeInfo = NScheme::TTypeInfo; namespace { - static const std::vector> testColumns = { - {"timestamp", TTypeInfo(NTypeIds::Timestamp) }, - {"uid", TTypeInfo(NTypeIds::Utf8) }, - {"sum", TTypeInfo(NTypeIds::Int32) }, - {"vat", TTypeInfo(NTypeIds::Int32) }, + static const std::vector testColumns = { + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("sum", TTypeInfo(NTypeIds::Int32)), + NArrow::NTest::TTestColumn("vat", TTypeInfo(NTypeIds::Int32)), }; - static const std::vector> testKey = { - {"timestamp", TTypeInfo(NTypeIds::Timestamp) }, - {"uid", TTypeInfo(NTypeIds::Utf8) } + static const std::vector testKey = { + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ) }; } @@ -133,7 +134,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(YqlKernel) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; { @@ -178,7 +179,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(YqlKernelStartsWithScalar) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; { @@ -232,7 +233,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(YqlKernelEndsWithScalar) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; { @@ -286,7 +287,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(YqlKernelStartsWith) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; { @@ -333,7 +334,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(YqlKernelEndsWith) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; @@ -381,7 +382,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(YqlKernelContains) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; @@ -434,7 +435,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(YqlKernelEquals) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; @@ -489,7 +490,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { void JsonExistsImpl(bool isBinaryType) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; { @@ -535,7 +536,9 @@ Y_UNIT_TEST_SUITE(TestProgram) { if (isBinaryType) { THashMap cc; cc["json_data"] = TTypeInfo(NTypeIds::JsonDocument); - batch = NArrow::ConvertColumns(batch, cc); + auto convertResult = NArrow::ConvertColumns(batch, cc); + UNIT_ASSERT_C(convertResult.ok(), convertResult.status().ToString()); + batch = *convertResult; Cerr << batch->ToString() << Endl; } auto res = program.ApplyProgram(batch); @@ -551,7 +554,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(Like) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; { @@ -657,7 +660,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { void JsonValueImpl(bool isBinaryType, NYql::EDataSlot resultType) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey); - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; { @@ -727,7 +730,9 @@ Y_UNIT_TEST_SUITE(TestProgram) { if (isBinaryType) { THashMap cc; cc["json_data"] = TTypeInfo(NTypeIds::JsonDocument); - batch = NArrow::ConvertColumns(batch, cc); + auto convertResult = NArrow::ConvertColumns(batch, cc); + UNIT_ASSERT_C(convertResult.ok(), convertResult.status().ToString()); + batch = *convertResult; Cerr << batch->ToString() << Endl; } @@ -808,7 +813,7 @@ Y_UNIT_TEST_SUITE(TestProgram) { Y_UNIT_TEST(SimpleFunction) { TIndexInfo indexInfo = BuildTableInfo(testColumns, testKey);; - TIndexColumnResolver columnResolver(indexInfo); + NReader::NPlain::TIndexColumnResolver columnResolver(indexInfo); NKikimrSSA::TProgram programProto; { diff --git a/ydb/core/tx/columnshard/engines/ut/ya.make b/ydb/core/tx/columnshard/engines/ut/ya.make index fc8159e31764..41a7d7b2aac3 100644 --- a/ydb/core/tx/columnshard/engines/ut/ya.make +++ b/ydb/core/tx/columnshard/engines/ut/ya.make @@ -23,6 +23,7 @@ PEERDIR( ydb/library/yql/sql/pg_dummy ydb/library/yql/core/arrow_kernels/request ydb/core/testlib/default + ydb/core/tx/columnshard/test_helper ydb/core/tx/columnshard/hooks/abstract ydb/core/tx/columnshard/hooks/testing diff --git a/ydb/core/tx/columnshard/engines/writer/blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/blob_constructor.cpp index 2fae90b9e7f1..ccf07b5ba5f3 100644 --- a/ydb/core/tx/columnshard/engines/writer/blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/blob_constructor.cpp @@ -2,4 +2,16 @@ namespace NKikimr::NOlap { +TBlobWriteInfo::TBlobWriteInfo(const TString& data, const std::shared_ptr& writeOperator, const std::optional& customBlobId) + : Data(data) + , WriteOperator(writeOperator) +{ + Y_ABORT_UNLESS(WriteOperator); + BlobId = WriteOperator->AddDataForWrite(data, customBlobId); +} + +NKikimr::NOlap::TBlobWriteInfo TBlobWriteInfo::BuildWriteTask(const TString& data, const std::shared_ptr& writeOperator, const std::optional& customBlobId /*= {}*/) { + return TBlobWriteInfo(data, writeOperator, customBlobId); +} + } diff --git a/ydb/core/tx/columnshard/engines/writer/blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/blob_constructor.h index c2df91c9a605..2c7bf22753af 100644 --- a/ydb/core/tx/columnshard/engines/writer/blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/blob_constructor.h @@ -23,18 +23,11 @@ class TBlobWriteInfo { private: YDB_READONLY_DEF(TUnifiedBlobId, BlobId); YDB_READONLY_DEF(TString, Data); - YDB_READONLY_DEF(std::shared_ptr, WriteOperator); - - TBlobWriteInfo(const TString& data, const std::shared_ptr& writeOperator) - : Data(data) - , WriteOperator(writeOperator) { - Y_ABORT_UNLESS(WriteOperator); - BlobId = WriteOperator->AddDataForWrite(data); - } + YDB_ACCESSOR_DEF(std::shared_ptr, WriteOperator); + + TBlobWriteInfo(const TString& data, const std::shared_ptr& writeOperator, const std::optional& customBlobId); public: - static TBlobWriteInfo BuildWriteTask(const TString& data, const std::shared_ptr& writeOperator) { - return TBlobWriteInfo(data, writeOperator); - } + static TBlobWriteInfo BuildWriteTask(const TString& data, const std::shared_ptr& writeOperator, const std::optional& customBlobId = {}); }; } diff --git a/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp b/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp index 28e0ab6ca203..0ffaaf3a9fee 100644 --- a/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/buffer/actor.cpp @@ -1,4 +1,5 @@ #include "actor.h" +#include #include namespace NKikimr::NColumnShard::NWriting { diff --git a/ydb/core/tx/columnshard/engines/writer/buffer/ya.make b/ydb/core/tx/columnshard/engines/writer/buffer/ya.make index 78a3ee199907..2aafd30fcc66 100644 --- a/ydb/core/tx/columnshard/engines/writer/buffer/ya.make +++ b/ydb/core/tx/columnshard/engines/writer/buffer/ya.make @@ -15,6 +15,8 @@ PEERDIR( ydb/core/tablet_flat ydb/library/yql/core/expr_nodes ydb/library/actors/testlib/common + ydb/core/tx/columnshard/data_sharing/protos + ydb/core/tx/columnshard/blobs_action/protos ) END() diff --git a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp index 59cd3a62042e..d3b7d27e95eb 100644 --- a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp +++ b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace NKikimr::NOlap { @@ -18,22 +19,31 @@ TCompactedWriteController::TCompactedWriteController(const TActorId& dstActor, T auto* pInfo = changes.GetWritePortionInfo(i); Y_ABORT_UNLESS(pInfo); TPortionInfoWithBlobs& portionWithBlobs = *pInfo; - auto action = changes.MutableBlobsAction().GetWriting(portionWithBlobs.GetPortionInfo()); for (auto&& b : portionWithBlobs.GetBlobs()) { - auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetBlob(), action)); + auto& task = AddWriteTask(TBlobWriteInfo::BuildWriteTask(b.GetBlob(), changes.MutableBlobsAction().GetWriting(b.GetOperator()->GetStorageId()))); b.RegisterBlobId(portionWithBlobs, task.GetBlobId()); } } } void TCompactedWriteController::DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult) { - WriteIndexEv->PutResult = putResult; + WriteIndexEv->PutResult = NYDBTest::TControllers::GetColumnShardController()->OverrideBlobPutResultOnCompaction(putResult, GetBlobActions()); ctx.Send(DstActor, WriteIndexEv.Release()); } TCompactedWriteController::~TCompactedWriteController() { if (WriteIndexEv && WriteIndexEv->IndexChanges) { - WriteIndexEv->IndexChanges->AbortEmergency(); + WriteIndexEv->IndexChanges->AbortEmergency("TCompactedWriteController destructed with WriteIndexEv and WriteIndexEv->IndexChanges"); + } +} + +const NKikimr::NOlap::TBlobsAction& TCompactedWriteController::GetBlobsAction() { + return WriteIndexEv->IndexChanges->GetBlobsAction(); +} + +void TCompactedWriteController::DoAbort(const TString& reason) { + if (WriteIndexEv && WriteIndexEv->IndexChanges) { + WriteIndexEv->IndexChanges->AbortEmergency("TCompactedWriteController aborted: " + reason); } } diff --git a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h index 36a5d9a34b45..eb8ef262bb97 100644 --- a/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/compacted_blob_constructor.h @@ -6,6 +6,7 @@ #include #include #include +#include namespace NKikimr::NOlap { @@ -15,7 +16,10 @@ class TCompactedWriteController : public NColumnShard::IWriteController { TActorId DstActor; protected: void DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult) override; + virtual void DoAbort(const TString& reason) override; public: + const TBlobsAction& GetBlobsAction(); + TCompactedWriteController(const TActorId& dstActor, TAutoPtr writeEv); ~TCompactedWriteController(); }; diff --git a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h index 392d76fd10c6..e4af7b2da294 100644 --- a/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h +++ b/ydb/core/tx/columnshard/engines/writer/indexed_blob_constructor.h @@ -5,6 +5,7 @@ #include #include +#include #include #include #include @@ -57,7 +58,7 @@ class TWritingBlob { bool AddData(TWideSerializedBatch& batch) { if (BlobData.size() + batch.GetSplittedBlobs().GetSize() < 8 * 1024 * 1024) { Ranges.emplace_back(&batch); - batch.SetRange(TBlobRange(TUnifiedBlobId(0, 0, 0, 0, BlobData.size() + batch.GetSplittedBlobs().GetSize()), BlobData.size(), batch.GetSplittedBlobs().GetSize())); + batch.SetRange(TBlobRange(TUnifiedBlobId(0, 0, 0, 0, 0, 0, BlobData.size() + batch.GetSplittedBlobs().GetSize()), BlobData.size(), batch.GetSplittedBlobs().GetSize())); BlobData += batch.GetSplittedBlobs().GetData(); return true; } else { @@ -134,9 +135,9 @@ class TWritingBuffer: public TMoveOnly { for (auto&& s : Aggregations[i]->GetSplittedBlobs()) { if (--linksCount[s.GetRange().BlobId] == 0) { if (!DeclareRemoveAction) { - DeclareRemoveAction = bOperator->StartDeclareRemovingAction("WRITING_BUFFER"); + DeclareRemoveAction = bOperator->StartDeclareRemovingAction(NBlobOperations::EConsumer::WRITING_BUFFER); } - DeclareRemoveAction->DeclareRemove(s.GetRange().BlobId); + DeclareRemoveAction->DeclareRemove(bOperator->GetSelfTabletId(), s.GetRange().BlobId); } } Aggregations.erase(Aggregations.begin() + i); diff --git a/ydb/core/tx/columnshard/engines/writer/write_controller.cpp b/ydb/core/tx/columnshard/engines/writer/write_controller.cpp index abae873b3c5b..0b3322b955c4 100644 --- a/ydb/core/tx/columnshard/engines/writer/write_controller.cpp +++ b/ydb/core/tx/columnshard/engines/writer/write_controller.cpp @@ -1,5 +1,25 @@ #include "write_controller.h" +#include namespace NKikimr::NColumnShard { +void IWriteController::OnBlobWriteResult(const TEvBlobStorage::TEvPutResult& result) { + NOlap::TUnifiedBlobId blobId(result.GroupId, result.Id); + auto it = WaitingActions.find(result.StorageId ? result.StorageId : NOlap::IStoragesManager::DefaultStorageId); + AFL_VERIFY(it != WaitingActions.end()); + it->second->OnBlobWriteResult(blobId, result.Status); + if (it->second->IsReady()) { + WaitingActions.erase(it); + } + DoOnBlobWriteResult(result); +} + +NKikimr::NOlap::TBlobWriteInfo& IWriteController::AddWriteTask(NOlap::TBlobWriteInfo&& task) { + auto fullAction = WritingActions.Add(task.GetWriteOperator()); + task.SetWriteOperator(fullAction); + WaitingActions.emplace(fullAction->GetStorageId(), fullAction); + WriteTasks.emplace_back(std::move(task)); + return WriteTasks.back(); +} + } diff --git a/ydb/core/tx/columnshard/engines/writer/write_controller.h b/ydb/core/tx/columnshard/engines/writer/write_controller.h index f6d030528b27..ae51e5571cec 100644 --- a/ydb/core/tx/columnshard/engines/writer/write_controller.h +++ b/ydb/core/tx/columnshard/engines/writer/write_controller.h @@ -4,7 +4,6 @@ #include "blob_constructor.h" #include -#include #include #include @@ -29,8 +28,8 @@ class TBlobPutResult: public NColumnShard::TPutStatus { class IWriteController { private: - THashMap> BlobActions; - THashMap> WritingActions; + THashMap> WaitingActions; + NOlap::TWriteActionsCollection WritingActions; std::deque WriteTasks; protected: virtual void DoOnReadyResult(const NActors::TActorContext& ctx, const TBlobPutResult::TPtr& putResult) = 0; @@ -41,12 +40,14 @@ class IWriteController { } - NOlap::TBlobWriteInfo& AddWriteTask(NOlap::TBlobWriteInfo&& task) { - WritingActions.emplace(task.GetWriteOperator()->GetActionId(), task.GetWriteOperator()); - WriteTasks.emplace_back(std::move(task)); - return WriteTasks.back(); + NOlap::TBlobWriteInfo& AddWriteTask(NOlap::TBlobWriteInfo&& task); + virtual void DoAbort(const TString& /*reason*/) { } public: + const NOlap::TWriteActionsCollection& GetBlobActions() const { + return WritingActions; + } + TString DebugString() const { TStringBuilder sb; for (auto&& i : WritingActions) { @@ -60,10 +61,12 @@ class IWriteController { return TStringBuilder() << "size=" << size << ";count=" << WriteTasks.size() << ";actions=" << sb << ";"; } - void Abort() { + void Abort(const TString& reason) { + AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("event", "IWriteController aborted")("reason", reason); for (auto&& i : WritingActions) { i.second->Abort(); } + DoAbort(reason); } using TPtr = std::shared_ptr; @@ -77,14 +80,7 @@ class IWriteController { DoOnReadyResult(ctx, putResult); } - void OnBlobWriteResult(const TEvBlobStorage::TEvPutResult& result) { - TUnifiedBlobId blobId(result.GroupId, result.Id); - auto it = BlobActions.find(blobId); - AFL_VERIFY(it != BlobActions.end()); - it->second->OnBlobWriteResult(blobId, result.Status); - BlobActions.erase(it); - DoOnBlobWriteResult(result); - } + void OnBlobWriteResult(const TEvBlobStorage::TEvPutResult& result); std::optional Next() { if (WriteTasks.empty()) { @@ -92,19 +88,11 @@ class IWriteController { } auto result = std::move(WriteTasks.front()); WriteTasks.pop_front(); - BlobActions.emplace(result.GetBlobId(), result.GetWriteOperator()); return result; } - bool IsBlobActionsReady() const { - return BlobActions.empty(); - } - std::vector> GetBlobActions() const { - std::vector> actions; - for (auto&& i : WritingActions) { - actions.emplace_back(i.second); - } - return actions; + bool IsReady() const { + return WaitingActions.empty(); } }; diff --git a/ydb/core/tx/columnshard/engines/ya.make b/ydb/core/tx/columnshard/engines/ya.make index 8e80cb730b69..4a244f8be456 100644 --- a/ydb/core/tx/columnshard/engines/ya.make +++ b/ydb/core/tx/columnshard/engines/ya.make @@ -30,6 +30,7 @@ PEERDIR( ydb/core/tx/columnshard/engines/changes ydb/core/tx/columnshard/engines/portions ydb/core/tx/program + ydb/core/tx/columnshard/common # for NYql::NUdf alloc stuff used in binary_json ydb/library/yql/public/udf/service/exception_policy diff --git a/ydb/core/tx/columnshard/export/actor/export_actor.cpp b/ydb/core/tx/columnshard/export/actor/export_actor.cpp new file mode 100644 index 000000000000..3790bc94b790 --- /dev/null +++ b/ydb/core/tx/columnshard/export/actor/export_actor.cpp @@ -0,0 +1,36 @@ +#include "export_actor.h" +#include + +namespace NKikimr::NOlap::NExport { + +void TActor::HandleExecute(NKqp::TEvKqpCompute::TEvScanData::TPtr& ev) { + SwitchStage(EStage::WaitData, EStage::WaitWriting); + auto data = ev->Get()->ArrowBatch; + AFL_VERIFY(!!data || ev->Get()->Finished); + if (data) { + CurrentData = NArrow::ToBatch(data, true); + CurrentDataBlob = Serializer->SerializeFull(CurrentData); + if (data) { + auto controller = std::make_shared(SelfId(), std::vector({CurrentDataBlob}), + BlobsOperator->StartWritingAction(NBlobOperations::EConsumer::EXPORT), Cursor, ShardTabletId, Selector->GetPathId()); + Register(CreateWriteActor((ui64)ShardTabletId, controller, TInstant::Max())); + } + } else { + CurrentData = nullptr; + CurrentDataBlob = ""; + TBase::Send(SelfId(), new NEvents::TEvExportWritingFinished); + } + TOwnedCellVec lastKey = ev->Get()->LastKey; + AFL_VERIFY(!Cursor.IsFinished()); + Cursor.InitNext(ev->Get()->LastKey, ev->Get()->Finished); +} + +void TActor::HandleExecute(NEvents::TEvExportWritingFailed::TPtr& /*ev*/) { + SwitchStage(EStage::WaitWriting, EStage::WaitWriting); + auto controller = std::make_shared(SelfId(), std::vector({CurrentDataBlob}), + BlobsOperator->StartWritingAction(NBlobOperations::EConsumer::EXPORT), + Cursor, ShardTabletId, Selector->GetPathId()); + Register(CreateWriteActor((ui64)ShardTabletId, controller, TInstant::Max())); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/actor/export_actor.h b/ydb/core/tx/columnshard/export/actor/export_actor.h new file mode 100644 index 000000000000..b82f97106407 --- /dev/null +++ b/ydb/core/tx/columnshard/export/actor/export_actor.h @@ -0,0 +1,118 @@ +#pragma once +#include "write.h" + +#include +#include +#include +#include +#include +#include + +#include + +namespace NKikimr::NOlap::NExport { + +class TActor: public NActors::TActorBootstrapped { +private: + enum class EStage { + Initialization, + WaitData, + WaitWriting, + WaitSaveCursor, + Finished + }; + + using TBase = NActors::TActorBootstrapped; + const TIdentifier ExportId; + const TActorId ShardActorId; + const TTabletId ShardTabletId; + NArrow::NSerialization::TSerializerContainer Serializer; + TSelectorContainer Selector; + std::shared_ptr BlobsOperator; + std::optional ScanActorId; + + TCursor Cursor; + EStage Stage = EStage::Initialization; + std::shared_ptr CurrentData; + TString CurrentDataBlob; + static inline const ui64 FreeSpace = ((ui64)8) << 20; + void SwitchStage(const EStage from, const EStage to) { + AFL_VERIFY(Stage == from)("from", (ui32)from)("real", (ui32)Stage)("to", (ui32)to); + Stage = to; + } + +protected: + void HandleExecute(NKqp::TEvKqpCompute::TEvScanInitActor::TPtr& ev) { + SwitchStage(EStage::Initialization, EStage::WaitData); + AFL_VERIFY(!ScanActorId); + auto& msg = ev->Get()->Record; + ScanActorId = ActorIdFromProto(msg.GetScanActorId()); + TBase::Send(*ScanActorId, new NKqp::TEvKqpCompute::TEvScanDataAck(FreeSpace, (ui64)ShardTabletId, 1)); + } + + void HandleExecute(NEvents::TEvExportCursorSaved::TPtr& /*ev*/) { + SwitchStage(EStage::WaitSaveCursor, EStage::WaitData); + AFL_VERIFY(ScanActorId); + TBase::Send(*ScanActorId, new NKqp::TEvKqpCompute::TEvScanDataAck(FreeSpace, (ui64)ShardTabletId, 1)); + } + + void HandleExecute(NEvents::TEvExportWritingFinished::TPtr& /*ev*/) { + SwitchStage(EStage::WaitWriting, EStage::WaitSaveCursor); + AFL_VERIFY(Cursor.HasLastKey()); + Send(ShardActorId, new NEvents::TEvExportSaveCursor(ExportId, Cursor)); + if (Cursor.IsFinished()) { + PassAway(); + } + } + + void HandleExecute(NEvents::TEvExportWritingFailed::TPtr& ev); + + void HandleExecute(NKqp::TEvKqpCompute::TEvScanData::TPtr& ev); + + void HandleExecute(NKqp::TEvKqpCompute::TEvScanError::TPtr& /*ev*/) { + AFL_VERIFY(false); + } +public: + + TActor(const TActorId& shardActorId, const TTabletId shardTabletId, const NArrow::NSerialization::TSerializerContainer& serializer, const TSelectorContainer& selector, + const std::shared_ptr& bOperator, const TIdentifier& id, const TCursor& startCursor) + : ExportId(id) + , ShardActorId(shardActorId) + , ShardTabletId(shardTabletId) + , Serializer(serializer) + , Selector(selector) + , BlobsOperator(bOperator) + , Cursor(startCursor) + { + AFL_VERIFY(serializer); + AFL_VERIFY(selector); + } + + void Bootstrap() { + auto evStart = Selector->BuildRequestInitiator(Cursor); + evStart->Record.SetGeneration((ui64)ShardTabletId); + Send(ShardActorId, evStart.release()); + Become(&TActor::StateFunc); + } + + STATEFN(StateFunc) { + try { + switch (ev->GetTypeRewrite()) { + hFunc(NKqp::TEvKqpCompute::TEvScanInitActor, HandleExecute); + hFunc(NKqp::TEvKqpCompute::TEvScanData, HandleExecute); + hFunc(NKqp::TEvKqpCompute::TEvScanError, HandleExecute); + hFunc(NEvents::TEvExportCursorSaved, HandleExecute); + hFunc(NEvents::TEvExportWritingFinished, HandleExecute); + hFunc(NEvents::TEvExportWritingFailed, HandleExecute); + default: + AFL_VERIFY(false)("event_type", ev->GetTypeName()); + } + } catch (...) { + AFL_VERIFY(false); + } + } + + +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/actor/write.cpp b/ydb/core/tx/columnshard/export/actor/write.cpp new file mode 100644 index 000000000000..4b48104b5601 --- /dev/null +++ b/ydb/core/tx/columnshard/export/actor/write.cpp @@ -0,0 +1,26 @@ +#include "write.h" +#include + +namespace NKikimr::NOlap::NExport { + +void TWriteController::DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult) { + if (putResult->GetPutStatus() == NKikimrProto::OK) { + ctx.Send(ExportActorId, new NEvents::TEvExportWritingFinished()); + } else { + ctx.Send(ExportActorId, new NEvents::TEvExportWritingFailed()); + } +} + +TWriteController::TWriteController(const TActorId& exportActorId, const std::vector& blobsToWrite, const std::shared_ptr& writeAction, + const TCursor& cursor, const TTabletId tabletId, const ui64 pathId) + : ExportActorId(exportActorId) +{ + for (auto&& i : blobsToWrite) { + auto blobId = TUnifiedBlobId((ui64)tabletId, (pathId << 24) >> 40, pathId >> 40, cursor.GetChunkIdx(), pathId & Max(), Max(), i.size()); + AFL_VERIFY((((ui64)blobId.GetLogoBlobId().Step() >> 8) << 40) + ((ui64)blobId.GetLogoBlobId().Generation() << 8) + blobId.GetLogoBlobId().Channel() == pathId); + auto info = NOlap::TBlobWriteInfo::BuildWriteTask(i, writeAction, blobId); + AddWriteTask(std::move(info)); + } +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/actor/write.h b/ydb/core/tx/columnshard/export/actor/write.h new file mode 100644 index 000000000000..ac5a28add2d2 --- /dev/null +++ b/ydb/core/tx/columnshard/export/actor/write.h @@ -0,0 +1,17 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap::NExport { + +class TWriteController: public NColumnShard::IWriteController { +private: + const TActorId ExportActorId; +protected: + virtual void DoOnReadyResult(const NActors::TActorContext& ctx, const NColumnShard::TBlobPutResult::TPtr& putResult); +public: + TWriteController(const TActorId& exportActorId, const std::vector& blobsToWrite, const std::shared_ptr& writeAction, + const TCursor& cursor, const TTabletId tabletId, const ui64 pathId); +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/actor/ya.make b/ydb/core/tx/columnshard/export/actor/ya.make new file mode 100644 index 000000000000..ce9e06a7390f --- /dev/null +++ b/ydb/core/tx/columnshard/export/actor/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( + export_actor.cpp + write.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/blobs_action/abstract + ydb/library/actors/core + ydb/core/tx/columnshard/engines/writer + ydb/core/tx/columnshard/export/events + ydb/core/kqp/compute_actor +) + +END() diff --git a/ydb/core/tx/columnshard/export/common/identifier.cpp b/ydb/core/tx/columnshard/export/common/identifier.cpp new file mode 100644 index 000000000000..647c72e846a0 --- /dev/null +++ b/ydb/core/tx/columnshard/export/common/identifier.cpp @@ -0,0 +1,49 @@ +#include "identifier.h" +#include +#include +#include +#include + +namespace NKikimr::NOlap::NExport { + +NKikimr::TConclusionStatus TIdentifier::DeserializeFromProto(const NKikimrColumnShardExportProto::TIdentifier& proto) { + PathId = proto.GetPathId(); + if (!PathId) { + return TConclusionStatus::Fail("Incorrect pathId (zero)"); + } + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TIdentifier::BuildFromProto(const NKikimrColumnShardExportProto::TIdentifier& proto) { + TIdentifier result; + auto parseResult = result.DeserializeFromProto(proto); + if (!parseResult) { + return parseResult; + } + return result; +} + +NKikimr::TConclusion TIdentifier::BuildFromProto(const NKikimrTxColumnShard::TBackupTxBody& proto) { + TIdentifier result; + result.PathId = proto.GetBackupTask().GetTableId(); + if (!result.PathId) { + return TConclusionStatus::Fail("incorrect pathId (cannot been zero)"); + } + return result; +} + +NKikimrColumnShardExportProto::TIdentifier TIdentifier::SerializeToProto() const { + NKikimrColumnShardExportProto::TIdentifier result; + result.SetPathId(PathId); + return result; +} + +TString TIdentifier::DebugString() const { + return SerializeToProto().DebugString(); +} + +TString TIdentifier::ToString() const { + return TStringBuilder() << "path_id=" << PathId << ";"; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/common/identifier.h b/ydb/core/tx/columnshard/export/common/identifier.h new file mode 100644 index 000000000000..6db80e809a2d --- /dev/null +++ b/ydb/core/tx/columnshard/export/common/identifier.h @@ -0,0 +1,48 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimrColumnShardExportProto { +class TIdentifier; +} + +namespace NKikimrTxColumnShard { +class TBackupTxBody; +} + +namespace NKikimr::NOlap::NExport { + +class TIdentifier { +private: + YDB_READONLY(ui64, PathId, 0); + + TIdentifier() = default; + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardExportProto::TIdentifier& proto); +public: + TIdentifier(const ui64 pathId) + : PathId(pathId) + { + + } + + static TConclusion BuildFromProto(const NKikimrTxColumnShard::TBackupTxBody& proto); + static TConclusion BuildFromProto(const NKikimrColumnShardExportProto::TIdentifier& proto); + + NKikimrColumnShardExportProto::TIdentifier SerializeToProto() const; + + TString ToString() const; + + operator size_t() const { + return PathId; + } + + bool operator==(const TIdentifier& id) const { + return PathId == id.PathId; + } + + TString DebugString() const; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/common/ya.make b/ydb/core/tx/columnshard/export/common/ya.make new file mode 100644 index 000000000000..7b6bfba407dd --- /dev/null +++ b/ydb/core/tx/columnshard/export/common/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + identifier.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/export/protos + ydb/library/conclusion + ydb/core/protos +) + +END() diff --git a/ydb/core/tx/columnshard/export/events/events.cpp b/ydb/core/tx/columnshard/export/events/events.cpp new file mode 100644 index 000000000000..1227faa871bb --- /dev/null +++ b/ydb/core/tx/columnshard/export/events/events.cpp @@ -0,0 +1,5 @@ +#include "events.h" + +namespace NKikimr::NOlap::NExport::NEvents { + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/events/events.h b/ydb/core/tx/columnshard/export/events/events.h new file mode 100644 index 000000000000..a8c6241583ae --- /dev/null +++ b/ydb/core/tx/columnshard/export/events/events.h @@ -0,0 +1,38 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap::NExport::NEvents { + +struct TEvExportWritingFinished: public TEventLocal { +}; + +struct TEvExportWritingFailed: public TEventLocal { +}; + +struct TEvExportCursorSaved: public TEventLocal { +}; + +class TEvExportSaveCursor: public TEventLocal { +private: + TIdentifier Identifier; + TCursor Cursor; +public: + const TIdentifier& GetIdentifier() const { + return Identifier; + } + + TCursor DetachCursor() { + return std::move(Cursor); + } + + TEvExportSaveCursor(const TIdentifier& id, const TCursor& cursor) + : Identifier(id) + , Cursor(cursor) { + + } +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/events/ya.make b/ydb/core/tx/columnshard/export/events/ya.make new file mode 100644 index 000000000000..136aa89b147f --- /dev/null +++ b/ydb/core/tx/columnshard/export/events/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + events.cpp +) + +PEERDIR( + ydb/core/base + ydb/core/tx/columnshard/export/common + ydb/core/tx/columnshard/export/session +) + +END() diff --git a/ydb/core/tx/columnshard/export/manager/manager.cpp b/ydb/core/tx/columnshard/export/manager/manager.cpp new file mode 100644 index 000000000000..9735ae0b73e6 --- /dev/null +++ b/ydb/core/tx/columnshard/export/manager/manager.cpp @@ -0,0 +1,67 @@ +#include "manager.h" +#include +#include +#include + +namespace NKikimr::NOlap::NExport { + +void TExportsManager::Start(const NColumnShard::TColumnShard* shard) { + for (auto&& i : Sessions) { + if (i.second->IsConfirmed()) { + AFL_VERIFY(i.second->Start(shard->GetStoragesManager(), (TTabletId)shard->TabletID(), shard->SelfId())); + } + } +} + +bool TExportsManager::Load(NTable::TDatabase& database) { + NIceDb::TNiceDb db(database); + using namespace NColumnShard; + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return false; + } + + while (!rowset.EndOfSet()) { + NKikimrColumnShardExportProto::TExportTask taskProto; + AFL_VERIFY(taskProto.ParseFromString(rowset.GetValue())); + auto task = TExportTask::BuildFromProto(taskProto); + AFL_VERIFY(task.IsSuccess())("error", task.GetErrorMessage()); + + NKikimrColumnShardExportProto::TCursor cursorProto; + AFL_VERIFY(cursorProto.ParseFromString(rowset.GetValue())); + auto cursor = TCursor::BuildFromProto(cursorProto); + AFL_VERIFY(cursor.IsSuccess())("error", cursor.GetErrorMessage()); + + TSession::EStatus status; + AFL_VERIFY(TryFromString(rowset.GetValue(), status)); + + auto session = std::make_shared(std::make_shared(task.DetachResult()), status, cursor.DetachResult()); + + AFL_VERIFY(Sessions.emplace(session->GetIdentifier(), session).second); + if (!rowset.Next()) { + return false; + } + } + + } + return true; +} + +void TExportsManager::RemoveSession(const NExport::TIdentifier& id, NTabletFlatExecutor::TTransactionContext& txc) { + auto session = GetSessionOptional(id); + if (session) { + AFL_VERIFY(session->IsDraft()); + } + Sessions.erase(id); + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(id.ToString()).Delete(); +} + +void TExportsManager::Stop() { + for (auto&& i : Sessions) { + i.second->Stop(); + } +} + +} diff --git a/ydb/core/tx/columnshard/export/manager/manager.h b/ydb/core/tx/columnshard/export/manager/manager.h new file mode 100644 index 000000000000..28cec4edb310 --- /dev/null +++ b/ydb/core/tx/columnshard/export/manager/manager.h @@ -0,0 +1,58 @@ +#pragma once +#include +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimr::NOlap::NExport { + + class TExportsManager { + private: + THashMap> Sessions; + public: + void Start(const NColumnShard::TColumnShard* shard); + void Stop(); + + TConclusionStatus ProposeTask(const std::shared_ptr& exportTask) { + auto it = Sessions.find(exportTask->GetIdentifier()); + if (it != Sessions.end()) { + return TConclusionStatus::Fail("task identifier exists already"); + } + Sessions.emplace(exportTask->GetIdentifier(), std::make_shared(exportTask)); + return TConclusionStatus::Success(); + } + + bool ConfirmSessionOnExecute(const NExport::TIdentifier& id, NTabletFlatExecutor::TTransactionContext& txc) { + auto session = GetSessionVerified(id); + AFL_VERIFY(session->IsDraft()); + session->SaveFullToDB(txc.DB); + return true; + } + + bool ConfirmSessionOnComplete(const NExport::TIdentifier& id) { + GetSessionVerified(id)->Confirm(); + return true; + } + + std::shared_ptr GetSessionOptional(const NExport::TIdentifier& id) const { + auto it = Sessions.find(id); + if (it == Sessions.end()) { + return nullptr; + } + return it->second; + } + + std::shared_ptr GetSessionVerified(const NExport::TIdentifier& id) const { + auto result = GetSessionOptional(id); + AFL_VERIFY(result); + return result; + } + + void RemoveSession(const NExport::TIdentifier& id, NTabletFlatExecutor::TTransactionContext& txc); + + bool Load(NTable::TDatabase& database); + + }; +} diff --git a/ydb/core/tx/columnshard/export/manager/ya.make b/ydb/core/tx/columnshard/export/manager/ya.make new file mode 100644 index 000000000000..4ec61ce42f7f --- /dev/null +++ b/ydb/core/tx/columnshard/export/manager/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + manager.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/tx/columnshard/blobs_action + ydb/core/tx/columnshard/export/session + ydb/core/tx/columnshard/export/protos +) + +END() diff --git a/ydb/core/tx/columnshard/export/protos/cursor.proto b/ydb/core/tx/columnshard/export/protos/cursor.proto new file mode 100644 index 000000000000..a57ea864a308 --- /dev/null +++ b/ydb/core/tx/columnshard/export/protos/cursor.proto @@ -0,0 +1,7 @@ +package NKikimrColumnShardExportProto; + +message TCursor { + optional string LastKey = 1; + optional bool Finished = 2; + optional uint32 ChunkIdx = 3; +} diff --git a/ydb/core/tx/columnshard/export/protos/selector.proto b/ydb/core/tx/columnshard/export/protos/selector.proto new file mode 100644 index 000000000000..51e19b7e3f72 --- /dev/null +++ b/ydb/core/tx/columnshard/export/protos/selector.proto @@ -0,0 +1,17 @@ +import "ydb/core/tx/columnshard/common/protos/snapshot.proto"; + +package NKikimrColumnShardExportProto; + +message TBackupSelector { + optional NKikimrColumnShardProto.TSnapshot Snapshot = 1; + optional string TableName = 2; + optional uint64 TablePathId = 3; +} + +message TSelectorContainer { + optional string ClassName = 1; + + oneof Implementation { + TBackupSelector Backup = 40; + } +} diff --git a/ydb/core/tx/columnshard/export/protos/storage.proto b/ydb/core/tx/columnshard/export/protos/storage.proto new file mode 100644 index 000000000000..cb3962f82963 --- /dev/null +++ b/ydb/core/tx/columnshard/export/protos/storage.proto @@ -0,0 +1,21 @@ +import "ydb/core/protos/flat_scheme_op.proto"; + +package NKikimrColumnShardExportProto; + +message TTierStorageInitializer { + optional string TierName = 1; +} + +message TExternalS3StorageInitializer { + optional string StorageName = 1; + optional NKikimrSchemeOp.TS3Settings Settings = 2; +} + +message TStorageInitializerContainer { + optional string ClassName = 1; + + oneof Implementation { + TTierStorageInitializer Tier = 40; + TExternalS3StorageInitializer ExternalS3 = 41; + } +} diff --git a/ydb/core/tx/columnshard/export/protos/task.proto b/ydb/core/tx/columnshard/export/protos/task.proto new file mode 100644 index 000000000000..7c8e9d60f5ef --- /dev/null +++ b/ydb/core/tx/columnshard/export/protos/task.proto @@ -0,0 +1,16 @@ +import "ydb/core/tx/columnshard/export/protos/selector.proto"; +import "ydb/core/tx/columnshard/export/protos/storage.proto"; +import "ydb/core/protos/flat_scheme_op.proto"; + +package NKikimrColumnShardExportProto; + +message TIdentifier { + optional uint64 PathId = 1; +} + +message TExportTask { + optional TIdentifier Identifier = 1; + optional TSelectorContainer Selector = 2; + optional TStorageInitializerContainer StorageInitializer = 3; + optional NKikimrSchemeOp.TOlapColumn.TSerializer Serializer = 4; +} diff --git a/ydb/core/tx/columnshard/export/protos/ya.make b/ydb/core/tx/columnshard/export/protos/ya.make new file mode 100644 index 000000000000..602d4aafb660 --- /dev/null +++ b/ydb/core/tx/columnshard/export/protos/ya.make @@ -0,0 +1,15 @@ +PROTO_LIBRARY() + +SRCS( + cursor.proto + selector.proto + storage.proto + task.proto +) + +PEERDIR( + ydb/core/tx/columnshard/common/protos + ydb/core/protos +) + +END() diff --git a/ydb/core/tx/columnshard/export/session/cursor.cpp b/ydb/core/tx/columnshard/export/session/cursor.cpp new file mode 100644 index 000000000000..c154162767ec --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/cursor.cpp @@ -0,0 +1,36 @@ +#include "cursor.h" +#include + +namespace NKikimr::NOlap::NExport { + +NKikimr::TConclusionStatus TCursor::DeserializeFromProto(const NKikimrColumnShardExportProto::TCursor& proto) { + if (proto.HasLastKey()) { + LastKey = TOwnedCellVec(TSerializedCellVec(proto.GetLastKey()).GetCells()); + } + if (proto.HasFinished()) { + Finished = proto.GetFinished(); + } + ChunkIdx = proto.GetChunkIdx(); + return TConclusionStatus::Success(); +} + +NKikimr::TConclusion TCursor::BuildFromProto(const NKikimrColumnShardExportProto::TCursor& proto) { + TCursor result; + auto parsedResult = result.DeserializeFromProto(proto); + if (!parsedResult) { + return parsedResult; + } + return result; +} + +NKikimrColumnShardExportProto::TCursor TCursor::SerializeToProto() const { + NKikimrColumnShardExportProto::TCursor result; + if (LastKey) { + result.SetLastKey(TSerializedCellVec::Serialize(*LastKey)); + } + result.SetFinished(Finished); + result.SetChunkIdx(ChunkIdx); + return result; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/cursor.h b/ydb/core/tx/columnshard/export/session/cursor.h new file mode 100644 index 000000000000..8e3ee12d0d33 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/cursor.h @@ -0,0 +1,55 @@ +#pragma once +#include +#include +#include + +namespace NKikimrColumnShardExportProto { +class TCursor; +} + +namespace NKikimr::NOlap::NExport { + +class TCursor { +private: + ui32 ChunkIdx = 1; + std::optional LastKey; + bool Finished = false; + + [[nodiscard]] TConclusionStatus DeserializeFromProto(const NKikimrColumnShardExportProto::TCursor& proto); +public: + TCursor() = default; + TCursor(const TOwnedCellVec& lastKey, const bool finished) + : LastKey(lastKey) + , Finished(finished) + { + + } + + const std::optional& GetLastKey() const { + return LastKey; + } + + ui32 GetChunkIdx() const { + return ChunkIdx; + } + + bool HasLastKey() const { + return !!LastKey; + } + + bool IsFinished() const { + return Finished; + } + + void InitNext(const TOwnedCellVec& lastKey, const bool finished) { + ++ChunkIdx; + LastKey = lastKey; + Finished = finished; + } + + static TConclusion BuildFromProto(const NKikimrColumnShardExportProto::TCursor& proto); + + NKikimrColumnShardExportProto::TCursor SerializeToProto() const; +}; + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/selector/abstract/selector.cpp b/ydb/core/tx/columnshard/export/session/selector/abstract/selector.cpp new file mode 100644 index 000000000000..aeed23ff2e62 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/selector/abstract/selector.cpp @@ -0,0 +1,16 @@ +#include "selector.h" +#include +#include +#include + +namespace NKikimr::NOlap::NExport { + +NKikimr::TConclusion TSelectorContainer::BuildFromProto(const NKikimrTxColumnShard::TBackupTxBody& proto) { + auto parsed = TBackupSelector::BuildFromProto(proto.GetBackupTask()); + if (!parsed) { + return parsed.GetError(); + } + return TSelectorContainer(std::make_shared(parsed.DetachResult())); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/selector/abstract/selector.h b/ydb/core/tx/columnshard/export/session/selector/abstract/selector.h new file mode 100644 index 000000000000..78a8268544a0 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/selector/abstract/selector.h @@ -0,0 +1,62 @@ +#pragma once +#include +#include +#include +#include +#include +#include + +namespace NKikimrTxColumnShard { +class TBackupTxBody; +} + +namespace NKikimr::NOlap::NExport { + +class ISelector { +protected: + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardExportProto::TSelectorContainer& proto) = 0; + virtual void DoSerializeToProto(NKikimrColumnShardExportProto::TSelectorContainer& proto) const = 0; + virtual std::unique_ptr DoBuildRequestInitiator(const TCursor& cursor) const = 0; + +public: + using TProto = NKikimrColumnShardExportProto::TSelectorContainer; + using TFactory = NObjectFactory::TObjectFactory; + + virtual ~ISelector() = default; + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardExportProto::TSelectorContainer& proto) { + return DoDeserializeFromProto(proto); + } + + std::unique_ptr BuildRequestInitiator(const TCursor& cursor) const { + return DoBuildRequestInitiator(cursor); + } + + void SerializeToProto(NKikimrColumnShardExportProto::TSelectorContainer& proto) const { + DoSerializeToProto(proto); + } + + virtual ui64 GetPathId() const = 0; + virtual TString GetClassName() const = 0; +}; + +class TSelectorContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; +public: + using TBase::TBase; + + static TConclusion BuildFromProto(const NKikimrColumnShardExportProto::TSelectorContainer& proto) { + TSelectorContainer result; + if (!result.DeserializeFromProto(proto)) { + return TConclusionStatus::Fail("cannot parse proto as TSelectorContainer"); + } + return result; + } + + static TConclusion BuildFromProto(const NKikimrTxColumnShard::TBackupTxBody& proto); + + TString DebugString() const { + return TBase::SerializeToProto().DebugString(); + } +}; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/selector/abstract/ya.make b/ydb/core/tx/columnshard/export/session/selector/abstract/ya.make new file mode 100644 index 000000000000..f0d43e38008d --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/selector/abstract/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + selector.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/export/protos + ydb/services/bg_tasks/abstract + ydb/library/conclusion + ydb/core/protos +) + +END() diff --git a/ydb/core/tx/columnshard/export/session/selector/backup/selector.cpp b/ydb/core/tx/columnshard/export/session/selector/backup/selector.cpp new file mode 100644 index 000000000000..319c94a473c9 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/selector/backup/selector.cpp @@ -0,0 +1,30 @@ +#include "selector.h" +#include + +namespace NKikimr::NOlap::NExport { + +std::unique_ptr TBackupSelector::DoBuildRequestInitiator(const TCursor& cursor) const { + auto ev = std::make_unique(); + ev->Record.SetLocalPathId(TablePathId); + + auto protoRanges = ev->Record.MutableRanges(); + + if (cursor.HasLastKey()) { + auto* newRange = protoRanges->Add(); + TSerializedTableRange(TSerializedCellVec::Serialize(*cursor.GetLastKey()), {}, false, false).Serialize(*newRange); + } + + ev->Record.MutableSnapshot()->SetStep(Snapshot.GetPlanStep()); + ev->Record.MutableSnapshot()->SetTxId(Snapshot.GetTxId()); + ev->Record.SetStatsMode(NYql::NDqProto::EDqStatsMode::DQ_STATS_MODE_NONE); + ev->Record.SetScanId(TablePathId); + ev->Record.SetTxId(TablePathId); + ev->Record.SetTablePath(TableName); + ev->Record.SetSchemaVersion(0); + + ev->Record.SetReverse(false); + ev->Record.SetDataFormat(NKikimrDataEvents::EDataFormat::FORMAT_ARROW); + return ev; +} + +} diff --git a/ydb/core/tx/columnshard/export/session/selector/backup/selector.h b/ydb/core/tx/columnshard/export/session/selector/backup/selector.h new file mode 100644 index 000000000000..2ec27a23ccb1 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/selector/backup/selector.h @@ -0,0 +1,80 @@ +#pragma once +#include +#include +#include + +namespace NKikimr::NOlap::NExport { + +class TBackupSelector: public ISelector { +public: + static TString GetClassNameStatic() { + return "BACKUP"; + } +private: + TSnapshot Snapshot = TSnapshot::Zero(); + TString TableName; + ui64 TablePathId; + static inline const TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetClassNameStatic()); + + TConclusionStatus Validate() const { + if (!Snapshot.Valid()) { + return TConclusionStatus::Fail("invalid snapshot"); + } + if (!TablePathId) { + return TConclusionStatus::Fail("invalid path id"); + } + if (!TableName) { + return TConclusionStatus::Fail("invalid table name"); + } + return TConclusionStatus::Success(); + } +protected: + virtual std::unique_ptr DoBuildRequestInitiator(const TCursor& cursor) const override; + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardExportProto::TSelectorContainer& proto) override { + auto result = Snapshot.DeserializeFromProto(proto.GetBackup().GetSnapshot()); + if (!result) { + return result; + } + TableName = proto.GetBackup().GetTableName(); + TablePathId = proto.GetBackup().GetTablePathId(); + return Validate(); + } + + virtual void DoSerializeToProto(NKikimrColumnShardExportProto::TSelectorContainer& proto) const override { + *proto.MutableBackup()->MutableSnapshot() = Snapshot.SerializeToProto(); + proto.MutableBackup()->SetTablePathId(TablePathId); + proto.MutableBackup()->SetTableName(TableName); + } + + TConclusionStatus DeserializeFromProto(const NKikimrSchemeOp::TBackupTask& proto) { + Snapshot = TSnapshot(proto.GetSnapshotStep(), proto.GetSnapshotTxId()); + TableName = proto.GetTableName(); + TablePathId = proto.GetTableId(); + return Validate(); + } +public: + TBackupSelector() = default; + TBackupSelector(const TSnapshot& snapshot) + : Snapshot(snapshot) { + + } + + virtual ui64 GetPathId() const override { + return TablePathId; + } + + static TConclusion BuildFromProto(const NKikimrSchemeOp::TBackupTask& proto) { + TBackupSelector result; + auto parsed = result.DeserializeFromProto(proto); + if (!parsed) { + return parsed; + } + return result; + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/selector/backup/ya.make b/ydb/core/tx/columnshard/export/session/selector/backup/ya.make new file mode 100644 index 000000000000..e941fe19565e --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/selector/backup/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + GLOBAL selector.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/export/session/selector/abstract + ydb/core/protos + ydb/library/yql/dq/actors/protos +) + +END() diff --git a/ydb/core/tx/columnshard/export/session/selector/ya.make b/ydb/core/tx/columnshard/export/session/selector/ya.make new file mode 100644 index 000000000000..8b8446f1abba --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/selector/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( +) + +PEERDIR( + ydb/core/tx/columnshard/export/session/selector/abstract + ydb/core/tx/columnshard/export/session/selector/backup +) + +END() diff --git a/ydb/core/tx/columnshard/export/session/session.cpp b/ydb/core/tx/columnshard/export/session/session.cpp new file mode 100644 index 000000000000..eb027a295ae0 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/session.cpp @@ -0,0 +1,65 @@ +#include "session.h" +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NOlap::NExport { + +TConclusion> TSession::SaveCursorTx(NColumnShard::TColumnShard* shard, TCursor&& newCursor, const std::shared_ptr& selfPtr) const { + AFL_VERIFY(IsStarted()); + AFL_VERIFY(ExportActorId); + return std::unique_ptr(new TTxSaveCursor(shard, selfPtr, std::move(newCursor), *ExportActorId)); +} + +void TSession::Stop() { + if (IsStarted()) { + AFL_VERIFY(ExportActorId); + NActors::TActivationContext::AsActorContext().Send(*ExportActorId, new TEvents::TEvPoisonPill); + ExportActorId = {}; + } +} + +bool TSession::Start(const std::shared_ptr& storages, const TTabletId tabletId, const TActorId& tabletActorId) { + AFL_VERIFY(IsConfirmed()); + auto blobsOperator = Task->GetStorageInitializer()->InitializeOperator(storages); + if (!blobsOperator) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "problem_on_export_start")("reason", "cannot_initialize_operator")("problem", blobsOperator.GetErrorMessage()); + return false; + } + AFL_VERIFY(!ExportActorId); + ExportActorId = NActors::TActivationContext::AsActorContext().Register(new TActor(tabletActorId, tabletId, + Task->GetSerializer(), Task->GetSelector(), blobsOperator.DetachResult(), Task->GetIdentifier(), Cursor)); + Status = EStatus::Started; + return true; +} + +void TSession::SaveFullToDB(NTable::TDatabase& tdb) { + using namespace NColumnShard; + NIceDb::TNiceDb db(tdb); + db.Table().Key(Task->GetIdentifier().ToString()).Update( + NIceDb::TUpdate(::ToString(Status)), + NIceDb::TUpdate(Cursor.SerializeToProto().SerializeAsString()), + NIceDb::TUpdate(Task->SerializeToProto().SerializeAsString()) + ); +} + +void TSession::SaveCursorToDB(NTable::TDatabase& tdb) { + using namespace NColumnShard; + NIceDb::TNiceDb db(tdb); + if (Status != EStatus::Started) { + db.Table().Key(Task->GetIdentifier().ToString()).Update( + NIceDb::TUpdate(::ToString(Status)), + NIceDb::TUpdate(Cursor.SerializeToProto().SerializeAsString()) + ); + } else { + db.Table().Key(Task->GetIdentifier().ToString()).Update( + NIceDb::TUpdate(Cursor.SerializeToProto().SerializeAsString()) + ); + } +} + +} diff --git a/ydb/core/tx/columnshard/export/session/session.h b/ydb/core/tx/columnshard/export/session/session.h new file mode 100644 index 000000000000..67beeb5c74dc --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/session.h @@ -0,0 +1,98 @@ +#pragma once +#include "task.h" +#include "cursor.h" +#include +#include + +namespace NKikimr::NColumnShard { +class TColumnShard; +} + +namespace NKikimr::NTabletFlatExecutor { +class ITransaction; +} + +namespace NKikimr::NOlap { +class IStoragesManager; +} + +namespace NKikimr::NOlap::NExport { + class TSession { + public: + enum class EStatus: ui64 { + Draft = 0 /*"draft"*/, + Confirmed = 1 /*"confirmed"*/, + Started = 2 /*"started"*/, + Finished = 3 /*"finished"*/ + }; + + private: + std::shared_ptr Task; + EStatus Status = EStatus::Draft; + TCursor Cursor; + std::optional ExportActorId; + public: + void SetCursor(const TCursor& cursor) { + Cursor = cursor; + if (Cursor.IsFinished()) { + Finish(); + } + } + + TSession(const std::shared_ptr& task) + : Task(task) { + AFL_VERIFY(Task); + } + + TSession(const std::shared_ptr& task, const EStatus status, TCursor&& cursor) + : Task(task) + , Status(status) + , Cursor(std::move(cursor)) { + AFL_VERIFY(Status != EStatus::Started); + AFL_VERIFY(Task); + } + + bool IsConfirmed() const { + return Status == EStatus::Confirmed; + } + + void SaveFullToDB(NTable::TDatabase& tdb); + + void SaveCursorToDB(NTable::TDatabase& tdb); + + [[nodiscard]] TConclusion> SaveCursorTx(NColumnShard::TColumnShard* shard, TCursor&& newCursor, const std::shared_ptr& selfPtr) const; + + const TCursor& GetCursor() const { + return Cursor; + } + + TString DebugString() const { + return TStringBuilder() << "task=" << Task->DebugString() << ";status=" << Status; + } + + bool IsDraft() const { + return Status == EStatus::Draft; + } + + void Confirm() { + AFL_VERIFY(IsDraft()); + Status = EStatus::Confirmed; + } + + bool IsStarted() const { + return Status == EStatus::Started; + } + + const TIdentifier& GetIdentifier() const { + return Task->GetIdentifier(); + } + + [[nodiscard]] bool Start(const std::shared_ptr& storages, const TTabletId tabletId, const TActorId& tabletActorId); + void Stop(); + void Finish() { + AFL_VERIFY(Status == EStatus::Started); + Status = EStatus::Finished; + } + + }; +} diff --git a/ydb/core/tx/columnshard/export/session/storage/abstract/storage.cpp b/ydb/core/tx/columnshard/export/session/storage/abstract/storage.cpp new file mode 100644 index 000000000000..f7cc791f14dc --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/abstract/storage.cpp @@ -0,0 +1,15 @@ +#include "storage.h" +#include +#include +#include + +namespace NKikimr::NOlap::NExport { + +NKikimr::TConclusion TStorageInitializerContainer::BuildFromProto(const NKikimrTxColumnShard::TBackupTxBody& proto) { + if (!proto.GetBackupTask().HasS3Settings()) { + return TConclusionStatus::Fail("s3 settings not found in backup task"); + } + return TStorageInitializerContainer(std::make_shared("BACKUP", proto.GetBackupTask().GetS3Settings())); +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/storage/abstract/storage.h b/ydb/core/tx/columnshard/export/session/storage/abstract/storage.h new file mode 100644 index 000000000000..f4991e153ffb --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/abstract/storage.h @@ -0,0 +1,63 @@ +#pragma once +#include +#include +#include +#include + +namespace NKikimr::NOlap { +class IStoragesManager; +class IBlobsStorageOperator; +} + +namespace NKikimrTxColumnShard { +class TBackupTxBody; +} + +namespace NKikimr::NOlap::NExport { + +class IStorageInitializer { +protected: + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) = 0; + virtual void DoSerializeToProto(NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) const = 0; + virtual TConclusion> DoInitializeOperator(const std::shared_ptr& storages) const = 0; +public: + using TProto = NKikimrColumnShardExportProto::TStorageInitializerContainer; + using TFactory = NObjectFactory::TObjectFactory; + + virtual ~IStorageInitializer() = default; + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) { + return DoDeserializeFromProto(proto); + } + + TConclusion> InitializeOperator(const std::shared_ptr& storages) const { + return DoInitializeOperator(storages); + } + + void SerializeToProto(NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) const { + DoSerializeToProto(proto); + } + + virtual TString GetClassName() const = 0; +}; + +class TStorageInitializerContainer: public NBackgroundTasks::TInterfaceProtoContainer { +private: + using TBase = NBackgroundTasks::TInterfaceProtoContainer; +public: + using TBase::TBase; + + static TConclusion BuildFromProto(const NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) { + TStorageInitializerContainer result; + if (!result.DeserializeFromProto(proto)) { + return TConclusionStatus::Fail("cannot parse proto as TSelectorContainer"); + } + return result; + } + + static TConclusion BuildFromProto(const NKikimrTxColumnShard::TBackupTxBody& proto); + + TString DebugString() const { + return TBase::SerializeToProto().DebugString(); + } +}; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/storage/abstract/ya.make b/ydb/core/tx/columnshard/export/session/storage/abstract/ya.make new file mode 100644 index 000000000000..587502a776ef --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/abstract/ya.make @@ -0,0 +1,15 @@ +LIBRARY() + +SRCS( + storage.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/export/session/storage/s3 + ydb/core/tx/columnshard/export/protos + ydb/services/bg_tasks/abstract + ydb/library/conclusion + ydb/core/protos +) + +END() diff --git a/ydb/core/tx/columnshard/export/session/storage/s3/storage.cpp b/ydb/core/tx/columnshard/export/session/storage/s3/storage.cpp new file mode 100644 index 000000000000..e3bbce9d4c92 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/s3/storage.cpp @@ -0,0 +1,25 @@ +#include "storage.h" +#include +#ifndef KIKIMR_DISABLE_S3_OPS +#include +#include +#endif +#include + +namespace NKikimr::NOlap::NExport { + +NKikimr::TConclusion> TS3StorageInitializer::DoInitializeOperator(const std::shared_ptr& storages) const { +#ifndef KIKIMR_DISABLE_S3_OPS + auto extStorageConfig = NWrappers::NExternalStorage::IExternalStorageConfig::Construct(S3Settings); + if (!extStorageConfig) { + return TConclusionStatus::Fail("cannot build operator with this config: " + S3Settings.DebugString()); + } + return std::shared_ptr(new NBlobOperations::NTier::TOperator("__EXPORT:" + StorageName, NActors::TActorId(), extStorageConfig, + std::make_shared("__EXPORT:" + StorageName, storages->GetSharedBlobsManager()->GetSelfTabletId()))); +#else + Y_UNUSED(storages); + return TConclusionStatus::Fail("s3 not supported"); +#endif +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/storage/s3/storage.h b/ydb/core/tx/columnshard/export/session/storage/s3/storage.h new file mode 100644 index 000000000000..1713875faae5 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/s3/storage.h @@ -0,0 +1,44 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NExport { + +class TS3StorageInitializer: public IStorageInitializer { +public: + static TString GetClassNameStatic() { + return "S3"; + } +private: + TString StorageName; + NKikimrSchemeOp::TS3Settings S3Settings; + static inline const TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetClassNameStatic()); +protected: + virtual TConclusion> DoInitializeOperator(const std::shared_ptr& storages) const override; +public: + TS3StorageInitializer() = default; + TS3StorageInitializer(const TString& storageName, const NKikimrSchemeOp::TS3Settings& s3Settings) + : StorageName(storageName) + , S3Settings(s3Settings) + { + + } + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) override { + if (!proto.HasExternalS3()) { + return TConclusionStatus::Fail("has not s3 configuration"); + } + S3Settings = proto.GetExternalS3().GetSettings(); + StorageName = proto.GetExternalS3().GetStorageName(); + return TConclusionStatus::Success(); + } + + virtual void DoSerializeToProto(NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) const override { + *proto.MutableExternalS3()->MutableSettings() = S3Settings; + proto.MutableExternalS3()->SetStorageName(StorageName); + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/storage/s3/ya.make b/ydb/core/tx/columnshard/export/session/storage/s3/ya.make new file mode 100644 index 000000000000..235ad58af4cb --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/s3/ya.make @@ -0,0 +1,23 @@ +LIBRARY() + +SRCS( + GLOBAL storage.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/export/session/selector/abstract + ydb/core/tx/columnshard/blobs_action/abstract + ydb/core/wrappers +) + +IF (OS_WINDOWS) + CFLAGS( + -DKIKIMR_DISABLE_S3_OPS + ) +ELSE() + PEERDIR( + ydb/core/tx/columnshard/blobs_action/tier + ) +ENDIF() + +END() diff --git a/ydb/core/tx/columnshard/export/session/storage/tier/storage.cpp b/ydb/core/tx/columnshard/export/session/storage/tier/storage.cpp new file mode 100644 index 000000000000..45e2d449b3c2 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/tier/storage.cpp @@ -0,0 +1,14 @@ +#include "storage.h" +#include + +namespace NKikimr::NOlap::NExport { + +NKikimr::TConclusion> TTierStorageInitializer::DoInitializeOperator(const std::shared_ptr& storages) const { + auto bOperator = storages->GetOperatorOptional(TierName); + if (!bOperator) { + return TConclusionStatus::Fail("cannot find tier with name '" + TierName + "' for export destination"); + } + return bOperator; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/storage/tier/storage.h b/ydb/core/tx/columnshard/export/session/storage/tier/storage.h new file mode 100644 index 000000000000..84ea3465f2c4 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/tier/storage.h @@ -0,0 +1,40 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NExport { + +class TTierStorageInitializer: public IStorageInitializer { +public: + static TString GetClassNameStatic() { + return "TIER"; + } +private: + TString TierName; + static inline const TFactory::TRegistrator Registrator = TFactory::TRegistrator(GetClassNameStatic()); +protected: + virtual TConclusion> DoInitializeOperator(const std::shared_ptr& storages) const override; +public: + TTierStorageInitializer() = default; + TTierStorageInitializer(const TString& tierName) + : TierName(tierName) + { + + } + + virtual TConclusionStatus DoDeserializeFromProto(const NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) override { + if (!proto.HasTier()) { + return TConclusionStatus::Fail("has not tier configuration"); + } + TierName = proto.GetTier().GetTierName(); + return TConclusionStatus::Success(); + } + + virtual void DoSerializeToProto(NKikimrColumnShardExportProto::TStorageInitializerContainer& proto) const override { + proto.MutableTier()->SetTierName(TierName); + } + + virtual TString GetClassName() const override { + return GetClassNameStatic(); + } +}; +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/export/session/storage/tier/ya.make b/ydb/core/tx/columnshard/export/session/storage/tier/ya.make new file mode 100644 index 000000000000..e2d3bc839b86 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/tier/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + GLOBAL storage.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/export/session/selector/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/export/session/storage/ya.make b/ydb/core/tx/columnshard/export/session/storage/ya.make new file mode 100644 index 000000000000..cd447f751650 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/storage/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( +) + +PEERDIR( + ydb/core/tx/columnshard/export/session/storage/abstract + ydb/core/tx/columnshard/export/session/storage/s3 + ydb/core/tx/columnshard/export/session/storage/tier +) + +END() diff --git a/ydb/core/tx/columnshard/export/session/task.cpp b/ydb/core/tx/columnshard/export/session/task.cpp new file mode 100644 index 000000000000..fd396f1b0895 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/task.cpp @@ -0,0 +1,48 @@ +#include "session.h" +#include + +namespace NKikimr::NOlap::NExport { + +NKikimr::TConclusionStatus TExportTask::DeserializeFromProto(const NKikimrColumnShardExportProto::TExportTask& proto) { + auto id = TIdentifier::BuildFromProto(proto.GetIdentifier()); + if (!id) { + return id; + } + auto selector = TSelectorContainer::BuildFromProto(proto.GetSelector()); + if (!selector) { + return selector; + } + auto initializer = TStorageInitializerContainer::BuildFromProto(proto.GetStorageInitializer()); + if (!initializer) { + return initializer; + } + auto serializer = NArrow::NSerialization::TSerializerContainer::BuildFromProto(proto.GetSerializer()); + if (!serializer) { + return serializer; + } + Identifier = id.DetachResult(); + Selector = selector.DetachResult(); + StorageInitializer = initializer.DetachResult(); + Serializer = serializer.DetachResult(); + return TConclusionStatus::Success(); +} + +NKikimrColumnShardExportProto::TExportTask TExportTask::SerializeToProto() const { + NKikimrColumnShardExportProto::TExportTask result; + *result.MutableIdentifier() = Identifier.SerializeToProto(); + *result.MutableSelector() = Selector.SerializeToProto(); + *result.MutableStorageInitializer() = StorageInitializer.SerializeToProto(); + *result.MutableSerializer() = Serializer.SerializeToProto(); + return result; +} + +NKikimr::TConclusion TExportTask::BuildFromProto(const NKikimrColumnShardExportProto::TExportTask& proto) { + TExportTask result; + auto resultParsed = result.DeserializeFromProto(proto); + if (!resultParsed) { + return resultParsed; + } + return result; +} + +} diff --git a/ydb/core/tx/columnshard/export/session/task.h b/ydb/core/tx/columnshard/export/session/task.h new file mode 100644 index 000000000000..144305d27515 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/task.h @@ -0,0 +1,49 @@ +#pragma once +#include "selector/abstract/selector.h" +#include "storage/abstract/storage.h" + +#include +#include + +#include +#include + +namespace NKikimrColumnShardExportProto { +class TExportTask; +} + +namespace NKikimr::NOlap::NExport { + +class TExportTask { +private: + TIdentifier Identifier = TIdentifier(0); + YDB_READONLY_DEF(TSelectorContainer, Selector); + YDB_READONLY_DEF(TStorageInitializerContainer, StorageInitializer); + YDB_READONLY_DEF(NArrow::NSerialization::TSerializerContainer, Serializer); + + TExportTask() = default; + + TConclusionStatus DeserializeFromProto(const NKikimrColumnShardExportProto::TExportTask& proto); + +public: + NKikimrColumnShardExportProto::TExportTask SerializeToProto() const; + + static TConclusion BuildFromProto(const NKikimrColumnShardExportProto::TExportTask& proto); + + const TIdentifier& GetIdentifier() const { + return Identifier; + } + + TExportTask(const TIdentifier& id, const TSelectorContainer& selector, const TStorageInitializerContainer& storageInitializer, const NArrow::NSerialization::TSerializerContainer& serializer) + : Identifier(id) + , Selector(selector) + , StorageInitializer(storageInitializer) + , Serializer(serializer) + { + } + + TString DebugString() const { + return TStringBuilder() << "{task_id=" << Identifier.DebugString() << ";selector=" << Selector.DebugString() << ";}"; + } +}; +} diff --git a/ydb/core/tx/columnshard/export/session/ya.make b/ydb/core/tx/columnshard/export/session/ya.make new file mode 100644 index 000000000000..a7ac2661e309 --- /dev/null +++ b/ydb/core/tx/columnshard/export/session/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +SRCS( + session.cpp + cursor.cpp + task.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/export/session/selector + ydb/core/tx/columnshard/export/session/storage + ydb/core/scheme + ydb/core/tx/columnshard/export/protos + ydb/core/tablet_flat + ydb/core/tx/columnshard/export/transactions +) + +GENERATE_ENUM_SERIALIZATION(session.h) + +END() diff --git a/ydb/core/tx/columnshard/export/transactions/tx_save_cursor.cpp b/ydb/core/tx/columnshard/export/transactions/tx_save_cursor.cpp new file mode 100644 index 000000000000..159c0b492933 --- /dev/null +++ b/ydb/core/tx/columnshard/export/transactions/tx_save_cursor.cpp @@ -0,0 +1,23 @@ +#include "tx_save_cursor.h" +#include +#include + +namespace NKikimr::NOlap::NExport { + +bool TTxSaveCursor::Execute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) { + TSession copy = *Session; + copy.SetCursor(Cursor); + copy.SaveCursorToDB(txc.DB); + return true; +} + +void TTxSaveCursor::Complete(const TActorContext& ctx) { + Session->SetCursor(Cursor); + if (!Cursor.IsFinished()) { + ctx.Send(ExportActorId, new NEvents::TEvExportCursorSaved); + } else { + NYDBTest::TControllers::GetColumnShardController()->OnExportFinished(); + } +} + +} diff --git a/ydb/core/tx/columnshard/export/transactions/tx_save_cursor.h b/ydb/core/tx/columnshard/export/transactions/tx_save_cursor.h new file mode 100644 index 000000000000..94023bf8a21a --- /dev/null +++ b/ydb/core/tx/columnshard/export/transactions/tx_save_cursor.h @@ -0,0 +1,26 @@ +#pragma once +#include + +namespace NKikimr::NOlap::NExport { +class TSession; +class TTxSaveCursor: public NColumnShard::TTransactionBase { +private: + using TBase = NColumnShard::TTransactionBase; + const TCursor Cursor; + const TActorId ExportActorId; + std::shared_ptr Session; +public: + TTxSaveCursor(NColumnShard::TColumnShard* self, const std::shared_ptr& session, TCursor&& cursor, const TActorId& exportActorId) + : TBase(self) + , Cursor(std::move(cursor)) + , ExportActorId(exportActorId) + , Session(session) + { + } + + bool Execute(NTabletFlatExecutor::TTransactionContext& txc, const TActorContext& /*ctx*/) override; + void Complete(const TActorContext& ctx) override; + TTxType GetTxType() const override { return NColumnShard::TXTYPE_EXPORT_SAVE_CURSOR; } +}; + +} diff --git a/ydb/core/tx/columnshard/export/transactions/ya.make b/ydb/core/tx/columnshard/export/transactions/ya.make new file mode 100644 index 000000000000..bd05fac6fa49 --- /dev/null +++ b/ydb/core/tx/columnshard/export/transactions/ya.make @@ -0,0 +1,14 @@ +LIBRARY() + +SRCS( + tx_save_cursor.cpp +) + +PEERDIR( + ydb/core/protos + ydb/core/tx/columnshard/export/protos + ydb/core/tx/columnshard/blobs_action + ydb/services/metadata/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/export/ya.make b/ydb/core/tx/columnshard/export/ya.make new file mode 100644 index 000000000000..99e9dcb669d9 --- /dev/null +++ b/ydb/core/tx/columnshard/export/ya.make @@ -0,0 +1,16 @@ +LIBRARY() + +SRCS( +) + +PEERDIR( + ydb/core/tx/columnshard/export/session + ydb/core/tx/columnshard/export/manager + ydb/core/tx/columnshard/export/actor + ydb/core/tx/columnshard/export/events + ydb/core/tx/columnshard/export/protos + ydb/core/tx/columnshard/export/common + ydb/core/tx/columnshard/export/transactions +) + +END() diff --git a/ydb/core/tx/columnshard/hooks/abstract/abstract.h b/ydb/core/tx/columnshard/hooks/abstract/abstract.h index e9b9144cacbb..f1665ee4de71 100644 --- a/ydb/core/tx/columnshard/hooks/abstract/abstract.h +++ b/ydb/core/tx/columnshard/hooks/abstract/abstract.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include @@ -11,16 +12,18 @@ #include #include -namespace NKikimr::NOlap::NIndexedReader { -class IOrderPolicy; -} - namespace NKikimr::NColumnShard { class TTiersManager; +class TColumnShard; } namespace NKikimr::NOlap { class TColumnEngineChanges; +class IBlobsGCAction; +class TPortionInfo; +namespace NStatistics { +class TOperatorContainer; +} } namespace arrow { class RecordBatch; @@ -44,48 +47,126 @@ class ILocalDBModifier { }; class ICSController { -private: - YDB_READONLY(TAtomicCounter, OnSortingPolicyCounter, 0); +public: + enum class EBackground { + Indexation, + Compaction, + TTL, + Cleanup, + GC + }; protected: - virtual bool DoOnSortingPolicy(std::shared_ptr /*policy*/) { - return true; + virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& /*shard*/) { + return; + } + virtual void DoOnTabletStopped(const ::NKikimr::NColumnShard::TColumnShard& /*shard*/) { + return; } virtual bool DoOnAfterFilterAssembling(const std::shared_ptr& /*batch*/) { return true; } - virtual bool DoOnStartCompaction(std::shared_ptr& /*changes*/) { + virtual bool DoOnWriteIndexComplete(const NOlap::TColumnEngineChanges& /*changes*/, const ::NKikimr::NColumnShard::TColumnShard& /*shard*/) { return true; } - virtual bool DoOnWriteIndexComplete(const ui64 /*tabletId*/, const TString& /*changeClassName*/) { + virtual bool DoOnWriteIndexStart(const ui64 /*tabletId*/, NOlap::TColumnEngineChanges& /*change*/) { return true; } - virtual bool DoOnWriteIndexStart(const ui64 /*tabletId*/, const TString& /*changeClassName*/) { - return true; + virtual void DoOnAfterSharingSessionsManagerStart(const NColumnShard::TColumnShard& /*shard*/) { + } + virtual void DoOnAfterGCAction(const NColumnShard::TColumnShard& /*shard*/, const NOlap::IBlobsGCAction& /*action*/) { + } + virtual void DoOnDataSharingFinished(const ui64 /*tabletId*/, const TString& /*sessionId*/) { + } + virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString & /*sessionId*/) { } public: + virtual bool IsBackgroundEnabled(const EBackground /*id*/) const { + return true; + } + using TPtr = std::shared_ptr; virtual ~ICSController() = default; - bool OnSortingPolicy(std::shared_ptr policy) { - OnSortingPolicyCounter.Inc(); - return DoOnSortingPolicy(policy); + + virtual NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction(const NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& /*actions*/) const { + return original; + } + + virtual ui64 GetReduceMemoryIntervalLimit(const ui64 def) const { + return def; + } + virtual ui64 GetRejectMemoryIntervalLimit(const ui64 def) const { + return def; + } + virtual bool NeedForceCompactionBacketsConstruction() const { + return false; + } + virtual ui64 GetSmallPortionSizeDetector(const ui64 def) const { + return def; + } + virtual void OnExportFinished() { + + } + virtual void OnActualizationRefreshScheme() { + + } + virtual void OnActualizationRefreshTiering() { + } + virtual void AddPortionForActualizer(const i32 /*portionsCount*/) { + + } + + void OnDataSharingFinished(const ui64 tabletId, const TString& sessionId) { + return DoOnDataSharingFinished(tabletId, sessionId); + } + void OnDataSharingStarted(const ui64 tabletId, const TString& sessionId) { + return DoOnDataSharingStarted(tabletId, sessionId); + } + virtual void OnStatisticsUsage(const NOlap::NStatistics::TOperatorContainer& /*statOperator*/) { + + } + virtual void OnPortionActualization(const NOlap::TPortionInfo& /*info*/) { + + } + virtual void OnMaxValueUsage() { + } + + virtual TDuration GetLagForCompactionBeforeTierings(const TDuration def) const { + return def; + } + + void OnTabletInitCompleted(const NColumnShard::TColumnShard& shard) { + DoOnTabletInitCompleted(shard); + } + + void OnTabletStopped(const NColumnShard::TColumnShard& shard) { + DoOnTabletStopped(shard); + } + + void OnAfterGCAction(const NColumnShard::TColumnShard& shard, const NOlap::IBlobsGCAction& action) { + DoOnAfterGCAction(shard, action); + } + bool OnAfterFilterAssembling(const std::shared_ptr& batch) { return DoOnAfterFilterAssembling(batch); } - bool OnWriteIndexComplete(const ui64 tabletId, const TString& changeClassName) { - return DoOnWriteIndexComplete(tabletId, changeClassName); + bool OnWriteIndexComplete(const NOlap::TColumnEngineChanges& changes, const NColumnShard::TColumnShard& shard) { + return DoOnWriteIndexComplete(changes, shard); } - bool OnWriteIndexStart(const ui64 tabletId, const TString& changeClassName) { - return DoOnWriteIndexStart(tabletId, changeClassName); + void OnAfterSharingSessionsManagerStart(const NColumnShard::TColumnShard& shard) { + return DoOnAfterSharingSessionsManagerStart(shard); } - bool OnStartCompaction(std::shared_ptr& changes) { - return DoOnStartCompaction(changes); + bool OnWriteIndexStart(const ui64 tabletId, NOlap::TColumnEngineChanges& change) { + return DoOnWriteIndexStart(tabletId, change); } virtual void OnIndexSelectProcessed(const std::optional /*result*/) { } + virtual TDuration GetReadTimeoutClean(const TDuration def) { + return def; + } virtual EOptimizerCompactionWeightControl GetCompactionControl() const { - return EOptimizerCompactionWeightControl::Force; + return EOptimizerCompactionWeightControl::Default; } virtual TDuration GetTTLDefaultWaitingDuration(const TDuration defaultValue) const { return defaultValue; @@ -117,6 +198,14 @@ class ICSController { static std::shared_ptr result = std::make_shared(TInstant::Now()); return result; } + + virtual void OnSwitchToWork(const ui64 tabletId) { + Y_UNUSED(tabletId); + } + + virtual void OnCleanupActors(const ui64 tabletId) { + Y_UNUSED(tabletId); + } }; class TControllers { @@ -153,6 +242,12 @@ class TControllers { static ICSController::TPtr GetColumnShardController() { return Singleton()->CSController; } + + template + static T* GetControllerAs() { + auto controller = Singleton()->CSController; + return dynamic_cast(controller.get()); + } }; } diff --git a/ydb/core/tx/columnshard/hooks/testing/controller.cpp b/ydb/core/tx/columnshard/hooks/testing/controller.cpp index 2fcf27ebaf76..b320dc6a4505 100644 --- a/ydb/core/tx/columnshard/hooks/testing/controller.cpp +++ b/ydb/core/tx/columnshard/hooks/testing/controller.cpp @@ -1,7 +1,11 @@ #include "controller.h" +#include +#include #include #include #include +#include +#include #include namespace NKikimr::NYDBTest::NColumnShard { @@ -13,16 +17,158 @@ bool TController::DoOnAfterFilterAssembling(const std::shared_ptr g(Mutex); + if (SharingIds.empty()) { + TCheckContext context; + CheckInvariants(shard, context); + } + return true; +} + +bool TController::DoOnWriteIndexStart(const ui64 tabletId, NOlap::TColumnEngineChanges& change) { + AFL_NOTICE(NKikimrServices::TX_COLUMNSHARD)("event", change.TypeString())("tablet_id", tabletId); + if (change.TypeString() == NOlap::TTTLColumnEngineChanges::StaticTypeName()) { + TTLStartedCounter.Inc(); + } + if (change.TypeString() == NOlap::TInsertColumnEngineChanges::StaticTypeName()) { + InsertStartedCounter.Inc(); + } + if (change.TypeString() == NOlap::TCompactColumnEngineChanges::StaticTypeName()) { + CompactionStartedCounter.Inc(); + } return true; } -bool TController::DoOnStartCompaction(std::shared_ptr& changes) { - if (auto compaction = dynamic_pointer_cast(changes)) { - Compactions.Inc(); +void TController::DoOnAfterGCAction(const ::NKikimr::NColumnShard::TColumnShard& /*shard*/, const NOlap::IBlobsGCAction& action) { + TGuard g(Mutex); + for (auto d = action.GetBlobsToRemove().GetDirect().GetIterator(); d.IsValid(); ++d) { + AFL_VERIFY(RemovedBlobIds[action.GetStorageId()][d.GetBlobId()].emplace(d.GetTabletId()).second); + } +// if (SharingIds.empty()) { +// CheckInvariants(); +// } +} + +void TController::CheckInvariants(const ::NKikimr::NColumnShard::TColumnShard& shard, TCheckContext& context) const { + if (!shard.HasIndex()) { + return; + } + const auto& index = shard.GetIndexAs(); + std::vector> granules = index.GetTables({}, {}); + THashMap> ids; + for (auto&& i : granules) { + for (auto&& p : i->GetPortions()) { + p.second->FillBlobIdsByStorage(ids, index.GetVersionedIndex()); + } + } + for (auto&& i : ids) { + auto it = RemovedBlobIds.find(i.first); + if (it == RemovedBlobIds.end()) { + continue; + } + for (auto&& b : i.second) { + auto itB = it->second.find(b); + if (itB != it->second.end()) { + AFL_VERIFY(!itB->second.contains((NOlap::TTabletId)shard.TabletID())); + } + } + } + THashMap shardBlobsCategories = shard.GetStoragesManager()->GetSharedBlobsManager()->GetBlobCategories(); + for (auto&& i : shardBlobsCategories) { + auto manager = shard.GetStoragesManager()->GetOperatorVerified(i.first); + const NOlap::TTabletsByBlob blobs = manager->GetBlobsToDelete(); + for (auto b = blobs.GetIterator(); b.IsValid(); ++b) { + i.second.RemoveSharing(b.GetTabletId(), b.GetBlobId()); + } + for (auto b = blobs.GetIterator(); b.IsValid(); ++b) { + i.second.RemoveBorrowed(b.GetTabletId(), b.GetBlobId()); + } + } + context.AddCategories(shard.TabletID(), std::move(shardBlobsCategories)); +} + +TController::TCheckContext TController::CheckInvariants() const { + TGuard g(Mutex); + TCheckContext context; + if (ExpectedShardsCount && *ExpectedShardsCount != ShardActuals.size()) { + return context; + } + for (auto&& i : ShardActuals) { + CheckInvariants(*i.second, context); + } + Cerr << context.DebugString() << Endl; + context.Check(); + return context; +} + +void TController::DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& shard) { + TGuard g(Mutex); + AFL_VERIFY(ShardActuals.emplace(shard.TabletID(), &shard).second); +} + +void TController::DoOnTabletStopped(const ::NKikimr::NColumnShard::TColumnShard& shard) { + TGuard g(Mutex); + AFL_VERIFY(ShardActuals.erase(shard.TabletID())); +} + +std::vector TController::GetPathIds(const ui64 tabletId) const { + TGuard g(Mutex); + std::vector result; + for (auto&& i : ShardActuals) { + if (i.first == tabletId) { + const auto& index = i.second->GetIndexAs(); + std::vector> granules = index.GetTables({}, {}); + + for (auto&& g : granules) { + result.emplace_back(g->GetPathId()); + } + break; + } + } + return result; +} + +bool TController::IsTrivialLinks() const { + TGuard g(Mutex); + for (auto&& i : ShardActuals) { + if (!i.second->GetStoragesManager()->GetSharedBlobsManager()->IsTrivialLinks()) { + return false; + } + if (i.second->GetStoragesManager()->HasBlobsToDelete()) { + return false; + } } return true; } +::NKikimr::NColumnShard::TBlobPutResult::TPtr TController::OverrideBlobPutResultOnCompaction(const ::NKikimr::NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& actions) const { + if (IndexWriteControllerEnabled) { + return original; + } + bool found = false; + for (auto&& i : actions) { + if (i.first != NOlap::NBlobOperations::TGlobal::DefaultStorageId) { + found = true; + break; + } + } + if (!found) { + return original; + } + IndexWriteControllerBrokeCount.Inc(); + ::NKikimr::NColumnShard::TBlobPutResult::TPtr result = std::make_shared<::NKikimr::NColumnShard::TBlobPutResult>(*original); + result->SetPutStatus(NKikimrProto::EReplyStatus::ERROR); + return result; +} + } diff --git a/ydb/core/tx/columnshard/hooks/testing/controller.h b/ydb/core/tx/columnshard/hooks/testing/controller.h index 7fc824508e91..c4a82b55d494 100644 --- a/ydb/core/tx/columnshard/hooks/testing/controller.h +++ b/ydb/core/tx/columnshard/hooks/testing/controller.h @@ -1,26 +1,173 @@ #pragma once +#include +#include +#include +#include #include +#include namespace NKikimr::NYDBTest::NColumnShard { class TController: public ICSController { private: + YDB_READONLY(TAtomicCounter, TTLFinishedCounter, 0); + YDB_READONLY(TAtomicCounter, TTLStartedCounter, 0); + YDB_READONLY(TAtomicCounter, InsertFinishedCounter, 0); + YDB_READONLY(TAtomicCounter, InsertStartedCounter, 0); + YDB_READONLY(TAtomicCounter, CompactionFinishedCounter, 0); + YDB_READONLY(TAtomicCounter, CompactionStartedCounter, 0); + YDB_READONLY(TAtomicCounter, FilteredRecordsCount, 0); - YDB_READONLY(TAtomicCounter, Compactions, 0); - YDB_READONLY(TAtomicCounter, Indexations, 0); YDB_READONLY(TAtomicCounter, IndexesSkippingOnSelect, 0); YDB_READONLY(TAtomicCounter, IndexesApprovedOnSelect, 0); YDB_READONLY(TAtomicCounter, IndexesSkippedNoData, 0); + YDB_READONLY(TAtomicCounter, TieringUpdates, 0); + YDB_READONLY(TAtomicCounter, NeedActualizationCount, 0); + + YDB_READONLY(TAtomicCounter, ActualizationsCount, 0); + YDB_READONLY(TAtomicCounter, ActualizationRefreshSchemeCount, 0); + YDB_READONLY(TAtomicCounter, ActualizationRefreshTieringCount, 0); + + YDB_ACCESSOR_DEF(std::optional, LagForCompactionBeforeTierings); YDB_ACCESSOR(std::optional, GuaranteeIndexationInterval, TDuration::Zero()); YDB_ACCESSOR(std::optional, PeriodicWakeupActivationPeriod, std::nullopt); YDB_ACCESSOR(std::optional, StatsReportInterval, std::nullopt); YDB_ACCESSOR(std::optional, GuaranteeIndexationStartBytesLimit, 0); YDB_ACCESSOR(std::optional, OptimizerFreshnessCheckDuration, TDuration::Zero()); EOptimizerCompactionWeightControl CompactionControl = EOptimizerCompactionWeightControl::Force; + + YDB_ACCESSOR(std::optional, OverrideReduceMemoryIntervalLimit, 1024); + YDB_ACCESSOR_DEF(std::optional, OverrideRejectMemoryIntervalLimit); + + std::optional ReadTimeoutClean; + std::optional ExpectedShardsCount; + + THashMap ShardActuals; + THashMap>> RemovedBlobIds; + TMutex Mutex; + + YDB_ACCESSOR(bool, IndexWriteControllerEnabled, true); + mutable TAtomicCounter IndexWriteControllerBrokeCount; + std::set DisabledBackgrounds; + + TMutex ActiveTabletsMutex; + std::set ActiveTablets; + + class TBlobInfo { + private: + const NOlap::TUnifiedBlobId BlobId; + std::optional OwnerTabletId; + THashSet SharedTabletIdsFromShared; + THashSet SharedTabletIdsFromOwner; + public: + TBlobInfo(const NOlap::TUnifiedBlobId& blobId) + : BlobId(blobId) + { + + } + void AddOwner(const ui64 tabletId) { + if (!OwnerTabletId) { + OwnerTabletId = tabletId; + } else { + AFL_VERIFY(*OwnerTabletId == tabletId); + } + } + + void AddSharingFromOwner(const ui64 tabletId) { + SharedTabletIdsFromOwner.emplace(tabletId); + } + void AddSharingFromShared(const ui64 tabletId) { + SharedTabletIdsFromShared.emplace(tabletId); + } + void Check() const { + AFL_VERIFY(OwnerTabletId); + AFL_VERIFY(SharedTabletIdsFromShared == SharedTabletIdsFromOwner)("blob_id", BlobId.ToStringNew())("shared", JoinSeq(",", SharedTabletIdsFromShared))("owned", JoinSeq(",", SharedTabletIdsFromOwner)); + } + + void DebugString(const TString& delta, TStringBuilder& sb) const { + if (OwnerTabletId) { + sb << delta << "O: " << *OwnerTabletId << Endl; + } + if (SharedTabletIdsFromShared.size()) { + sb << delta << "S: " << JoinSeq(",", SharedTabletIdsFromShared) << Endl; + } + } + }; + + class TCheckContext { + private: + THashMap> Infos; + public: + void Check() const { + for (auto&& i : Infos) { + for (auto&& b : i.second) { + b.second.Check(); + } + } + } + + TString DebugString() const { + TStringBuilder sb; + for (auto&& i : Infos) { + sb << i.first << Endl; + for (auto&& b : i.second) { + sb << " " << b.first << Endl; + b.second.DebugString(" ", sb); + } + } + return sb; + } + + void AddCategories(const ui64 tabletId, THashMap&& categories) { + for (auto&& s : categories) { + for (auto it = s.second.GetDirect().GetIterator(); it.IsValid(); ++it) { + Infos[s.first].emplace(it.GetBlobId(), it.GetBlobId()).first->second.AddOwner((ui64)it.GetTabletId()); + } + for (auto it = s.second.GetBorrowed().GetIterator(); it.IsValid(); ++it) { + Infos[s.first].emplace(it.GetBlobId(), it.GetBlobId()).first->second.AddOwner((ui64)it.GetTabletId()); + Infos[s.first].emplace(it.GetBlobId(), it.GetBlobId()).first->second.AddSharingFromShared((ui64)tabletId); + } + for (auto it = s.second.GetSharing().GetIterator(); it.IsValid(); ++it) { + Infos[s.first].emplace(it.GetBlobId(), it.GetBlobId()).first->second.AddOwner(tabletId); + Infos[s.first].emplace(it.GetBlobId(), it.GetBlobId()).first->second.AddSharingFromOwner((ui64)it.GetTabletId()); + if (it.GetTabletId() == (NOlap::TTabletId)tabletId) { + Infos[s.first].emplace(it.GetBlobId(), it.GetBlobId()).first->second.AddSharingFromShared((ui64)tabletId); + } + } + } + } + }; + + void CheckInvariants(const ::NKikimr::NColumnShard::TColumnShard& shard, TCheckContext& context) const; + + THashSet SharingIds; protected: + virtual ::NKikimr::NColumnShard::TBlobPutResult::TPtr OverrideBlobPutResultOnCompaction(const ::NKikimr::NColumnShard::TBlobPutResult::TPtr original, const NOlap::TWriteActionsCollection& actions) const override; + virtual TDuration GetLagForCompactionBeforeTierings(const TDuration def) const override { + return LagForCompactionBeforeTierings.value_or(def); + } + + virtual bool IsBackgroundEnabled(const EBackground id) const override { + TGuard g(Mutex); + return !DisabledBackgrounds.contains(id); + } + + virtual void OnPortionActualization(const NOlap::TPortionInfo& /*info*/) override { + ActualizationsCount.Inc(); + } + virtual void OnActualizationRefreshScheme() override { + ActualizationRefreshSchemeCount.Inc(); + } + virtual void OnActualizationRefreshTiering() override { + ActualizationRefreshTieringCount.Inc(); + } + virtual void DoOnTabletInitCompleted(const ::NKikimr::NColumnShard::TColumnShard& shard) override; + virtual void DoOnTabletStopped(const ::NKikimr::NColumnShard::TColumnShard& shard) override; + virtual void DoOnAfterGCAction(const ::NKikimr::NColumnShard::TColumnShard& shard, const NOlap::IBlobsGCAction& action) override; + + virtual bool DoOnWriteIndexStart(const ui64 tabletId, NOlap::TColumnEngineChanges& change) override; virtual bool DoOnAfterFilterAssembling(const std::shared_ptr& batch) override; - virtual bool DoOnStartCompaction(std::shared_ptr& changes) override; - virtual bool DoOnWriteIndexComplete(const ui64 /*tabletId*/, const TString& /*changeClassName*/) override; + virtual bool DoOnWriteIndexComplete(const NOlap::TColumnEngineChanges& changes, const ::NKikimr::NColumnShard::TColumnShard& shard) override; virtual TDuration GetGuaranteeIndexationInterval(const TDuration defaultValue) const override { return GuaranteeIndexationInterval.value_or(defaultValue); } @@ -36,11 +183,83 @@ class TController: public ICSController { virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration defaultValue) const override { return OptimizerFreshnessCheckDuration.value_or(defaultValue); } + virtual TDuration GetReadTimeoutClean(const TDuration def) override { + return ReadTimeoutClean.value_or(def); + } virtual EOptimizerCompactionWeightControl GetCompactionControl() const override { return CompactionControl; } + void OnTieringModified(const std::shared_ptr& /*tiers*/) override { + TieringUpdates.Inc(); + } + + virtual void DoOnDataSharingFinished(const ui64 /*tabletId*/, const TString& sessionId) override { + TGuard g(Mutex); + AFL_VERIFY(SharingIds.erase(sessionId)); + if (SharingIds.empty()) { + CheckInvariants(); + } + } + virtual void DoOnDataSharingStarted(const ui64 /*tabletId*/, const TString& sessionId) override { + TGuard g(Mutex); + if (SharingIds.empty()) { + CheckInvariants(); + } + SharingIds.emplace(sessionId); + } public: + const TAtomicCounter& GetIndexWriteControllerBrokeCount() const { + return IndexWriteControllerBrokeCount; + } + virtual ui64 GetReduceMemoryIntervalLimit(const ui64 def) const override { + return OverrideReduceMemoryIntervalLimit.value_or(def); + } + virtual ui64 GetRejectMemoryIntervalLimit(const ui64 def) const override { + return OverrideRejectMemoryIntervalLimit.value_or(def); + } + bool IsTrivialLinks() const; + TCheckContext CheckInvariants() const; + + ui32 GetShardActualsCount() const { + TGuard g(Mutex); + return ShardActuals.size(); + } + + virtual void AddPortionForActualizer(const i32 portionsCount) override { + NeedActualizationCount.Add(portionsCount); + } + + void DisableBackground(const EBackground id) { + TGuard g(Mutex); + DisabledBackgrounds.emplace(id); + } + + void EnableBackground(const EBackground id) { + TGuard g(Mutex); + DisabledBackgrounds.erase(id); + } + + void WaitActualization(const TDuration d) const { + const TInstant start = TInstant::Now(); + while (TInstant::Now() - start < d && NeedActualizationCount.Val()) { + Cerr << "waiting actualization: " << NeedActualizationCount.Val() << "/" << TInstant::Now() - start << Endl; + Sleep(TDuration::Seconds(1)); + } + AFL_VERIFY(!NeedActualizationCount.Val()); + } + + std::vector GetShardActualIds() const { + TGuard g(Mutex); + std::vector result; + for (auto&& i : ShardActuals) { + result.emplace_back(i.first); + } + return result; + } + + std::vector GetPathIds(const ui64 tabletId) const; + virtual void OnIndexSelectProcessed(const std::optional result) override { if (!result) { IndexesSkippedNoData.Inc(); @@ -50,13 +269,36 @@ class TController: public ICSController { IndexesSkippingOnSelect.Inc(); } } + void SetExpectedShardsCount(const ui32 value) { + ExpectedShardsCount = value; + } void SetCompactionControl(const EOptimizerCompactionWeightControl value) { CompactionControl = value; } + void SetReadTimeoutClean(const TDuration d) { + ReadTimeoutClean = d; + } bool HasPKSortingOnly() const; - bool HasCompactions() const { - return Compactions.Val(); + + void OnSwitchToWork(const ui64 tabletId) override { + TGuard g(ActiveTabletsMutex); + ActiveTablets.emplace(tabletId); + } + + void OnCleanupActors(const ui64 tabletId) override { + TGuard g(ActiveTabletsMutex); + ActiveTablets.erase(tabletId); + } + + ui64 GetActiveTabletsCount() const { + TGuard g(ActiveTabletsMutex); + return ActiveTablets.size(); + } + + bool IsActiveTablet(const ui64 tabletId) const { + TGuard g(ActiveTabletsMutex); + return ActiveTablets.contains(tabletId); } }; diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.cpp b/ydb/core/tx/columnshard/inflight_request_tracker.cpp new file mode 100644 index 000000000000..98ca6d7ab6da --- /dev/null +++ b/ydb/core/tx/columnshard/inflight_request_tracker.cpp @@ -0,0 +1,90 @@ +#include "inflight_request_tracker.h" +#include "engines/column_engine.h" +#include "engines/reader/plain_reader/constructor/read_metadata.h" + +namespace NKikimr::NColumnShard { + +void TInFlightReadsTracker::RemoveInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index) { + Y_ABORT_UNLESS(RequestsMeta.contains(cookie), "Unknown request cookie %" PRIu64, cookie); + const auto& readMetaList = RequestsMeta[cookie]; + + for (const auto& readMetaBase : readMetaList) { + NOlap::NReader::NPlain::TReadMetadata::TConstPtr readMeta = std::dynamic_pointer_cast(readMetaBase); + + if (!readMeta) { + continue; + } + + THashMap> portionBlobIds; + for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { + const ui64 portionId = portion->GetPortion(); + AFL_VERIFY(index); + portion->FillBlobIdsByStorage(portionBlobIds, *index); + auto it = PortionUseCount.find(portionId); + Y_ABORT_UNLESS(it != PortionUseCount.end(), "Portion id %" PRIu64 " not found in request %" PRIu64, portionId, cookie); + if (it->second == 1) { + PortionUseCount.erase(it); + } else { + it->second--; + } + } + + for (auto&& i : portionBlobIds) { + auto storage = StoragesManager->GetOperatorVerified(i.first); + auto tracker = storage->GetBlobsTracker(); + for (auto& blobId : i.second) { + tracker->FreeBlob(blobId); + } + } + + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); + for (const auto& committedBlob : readMeta->CommittedBlobs) { + tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); + } + } + + RequestsMeta.erase(cookie); +} + +TConclusionStatus TInFlightReadsTracker::AddToInFlightRequest(const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index) { + RequestsMeta[cookie].push_back(readMetaBase); + + auto readMeta = std::dynamic_pointer_cast(readMetaBase); + + if (!readMeta) { + return TConclusionStatus::Success(); + } + + auto selectInfo = readMeta->SelectInfo; + Y_ABORT_UNLESS(selectInfo); + SelectStatsDelta += selectInfo->Stats(); + + THashMap> portionBlobIds; + for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { + const ui64 portionId = portion->GetPortion(); + PortionUseCount[portionId]++; + AFL_VERIFY(index); + portion->FillBlobIdsByStorage(portionBlobIds, *index); + } + + for (auto&& i : portionBlobIds) { + auto storage = StoragesManager->GetOperatorOptional(i.first); + if (!storage) { + return TConclusionStatus::Fail("blobs storage info not ready for '" + i.first + "'"); + } + auto tracker = storage->GetBlobsTracker(); + for (auto& blobId : i.second) { + tracker->UseBlob(blobId); + } + } + + auto insertStorage = StoragesManager->GetInsertOperator(); + auto tracker = insertStorage->GetBlobsTracker(); + for (const auto& committedBlob : readMeta->CommittedBlobs) { + tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); + } + return TConclusionStatus::Success(); +} + +} diff --git a/ydb/core/tx/columnshard/inflight_request_tracker.h b/ydb/core/tx/columnshard/inflight_request_tracker.h index 8b6c35bcac9a..d530c11d7a4f 100644 --- a/ydb/core/tx/columnshard/inflight_request_tracker.h +++ b/ydb/core/tx/columnshard/inflight_request_tracker.h @@ -1,67 +1,29 @@ #pragma once #include "blob.h" -#include +#include + +namespace NKikimr::NOlap { +class TVersionedIndex; +} namespace NKikimr::NColumnShard { -using NOlap::TReadMetadata; using NOlap::IBlobInUseTracker; class TInFlightReadsTracker { public: // Returns a unique cookie associated with this request - ui64 AddInFlightRequest(NOlap::TReadMetadataBase::TConstPtr readMeta) { - const ui64 cookie = NextCookie++; - AddToInFlightRequest(cookie, readMeta); - return cookie; - } - - // Returns a unique cookie associated with this request - template - ui64 AddInFlightRequest(const TReadMetadataList& readMetaList) { + [[nodiscard]] TConclusion AddInFlightRequest(NOlap::NReader::TReadMetadataBase::TConstPtr readMeta, const NOlap::TVersionedIndex* index) { const ui64 cookie = NextCookie++; - for (const auto& readMetaPtr : readMetaList) { - AddToInFlightRequest(cookie, readMetaPtr); + auto status = AddToInFlightRequest(cookie, readMeta, index); + if (!status) { + return status; } return cookie; } - void RemoveInFlightRequest(ui64 cookie) { - Y_ABORT_UNLESS(RequestsMeta.contains(cookie), "Unknown request cookie %" PRIu64, cookie); - const auto& readMetaList = RequestsMeta[cookie]; - - for (const auto& readMetaBase : readMetaList) { - NOlap::TReadMetadata::TConstPtr readMeta = std::dynamic_pointer_cast(readMetaBase); - - if (!readMeta) { - continue; - } - - for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { - const ui64 portionId = portion->GetPortion(); - auto it = PortionUseCount.find(portionId); - Y_ABORT_UNLESS(it != PortionUseCount.end(), "Portion id %" PRIu64 " not found in request %" PRIu64, portionId, cookie); - if (it->second == 1) { - PortionUseCount.erase(it); - } else { - it->second--; - } - auto tracker = portion->GetBlobsStorage()->GetBlobsTracker(); - for (auto& rec : portion->Records) { - tracker->FreeBlob(rec.BlobRange.BlobId); - } - } - - auto insertStorage = StoragesManager->GetInsertOperator(); - auto tracker = insertStorage->GetBlobsTracker(); - for (const auto& committedBlob : readMeta->CommittedBlobs) { - tracker->FreeBlob(committedBlob.GetBlobRange().GetBlobId()); - } - } - - RequestsMeta.erase(cookie); - } + void RemoveInFlightRequest(ui64 cookie, const NOlap::TVersionedIndex* index); // Checks if the portion is in use by any in-flight request bool IsPortionUsed(ui64 portionId) const { @@ -81,39 +43,12 @@ class TInFlightReadsTracker { } private: - void AddToInFlightRequest(const ui64 cookie, NOlap::TReadMetadataBase::TConstPtr readMetaBase) { - RequestsMeta[cookie].push_back(readMetaBase); - - NOlap::TReadMetadata::TConstPtr readMeta = std::dynamic_pointer_cast(readMetaBase); - - if (!readMeta) { - return; - } - - auto selectInfo = readMeta->SelectInfo; - Y_ABORT_UNLESS(selectInfo); - SelectStatsDelta += selectInfo->Stats(); - - for (const auto& portion : readMeta->SelectInfo->PortionsOrderedPK) { - const ui64 portionId = portion->GetPortion(); - PortionUseCount[portionId]++; - auto tracker = portion->GetBlobsStorage()->GetBlobsTracker(); - for (auto& rec : portion->Records) { - tracker->UseBlob(rec.BlobRange.BlobId); - } - } - - auto insertStorage = StoragesManager->GetInsertOperator(); - auto tracker = insertStorage->GetBlobsTracker(); - for (const auto& committedBlob : readMeta->CommittedBlobs) { - tracker->UseBlob(committedBlob.GetBlobRange().GetBlobId()); - } - } + [[nodiscard]] TConclusionStatus AddToInFlightRequest(const ui64 cookie, NOlap::NReader::TReadMetadataBase::TConstPtr readMetaBase, const NOlap::TVersionedIndex* index); private: std::shared_ptr StoragesManager; ui64 NextCookie{1}; - THashMap> RequestsMeta; + THashMap> RequestsMeta; THashMap PortionUseCount; NOlap::TSelectInfo::TStats SelectStatsDelta; }; diff --git a/ydb/core/tx/columnshard/normalizer/abstract/abstract.cpp b/ydb/core/tx/columnshard/normalizer/abstract/abstract.cpp index 470d01bb3d4c..99de629aeb39 100644 --- a/ydb/core/tx/columnshard/normalizer/abstract/abstract.cpp +++ b/ydb/core/tx/columnshard/normalizer/abstract/abstract.cpp @@ -1,13 +1,17 @@ #include "abstract.h" +#include +#include + namespace NKikimr::NOlap { void TNormalizationController::RegisterNormalizer(INormalizerComponent::TPtr normalizer) { + AFL_VERIFY(normalizer); Counters.emplace_back(normalizer->GetName()); Normalizers.push_back(normalizer); } - const INormalizerComponent::TPtr& TNormalizationController::GetNormalizer() const { + const TNormalizationController::INormalizerComponent::TPtr& TNormalizationController::GetNormalizer() const { Y_ABORT_UNLESS(CurrentNormalizerIndex < Normalizers.size()); return Normalizers[CurrentNormalizerIndex]; } @@ -17,7 +21,7 @@ namespace NKikimr::NOlap { return Counters[CurrentNormalizerIndex]; } - bool TNormalizationController::IsNormalizationFinished() const { + bool TNormalizationController::TNormalizationController::IsNormalizationFinished() const { return CurrentNormalizerIndex >= Normalizers.size(); } @@ -25,9 +29,38 @@ namespace NKikimr::NOlap { if (IsNormalizationFinished()) { return false; } - Y_ABORT_UNLESS(!GetNormalizer()->WaitResult()); + Y_ABORT_UNLESS(!GetNormalizer()->HasActiveTasks()); GetCounters().OnNormalizerFinish(); ++CurrentNormalizerIndex; return !IsNormalizationFinished(); } + + void TTrivialNormalizerTask::Start(const TNormalizationController& /* controller */, const TNormalizationContext& nCtx) { + TActorContext::AsActorContext().Send(nCtx.GetShardActor(), std::make_unique(Changes)); + } + + void TNormalizationController::UpdateControllerState(NIceDb::TNiceDb& db) { + NColumnShard::Schema::SaveSpecialValue(db, NColumnShard::Schema::EValueIds::LastNormalizerSequentialId, GetNormalizer()->GetSequentialId()); + } + + void TNormalizationController::InitNormalizers(const TInitContext& ctx) { + auto normalizers = GetEnumAllValues(); + auto lastRegisteredNormalizer = ENormalizerSequentialId::Granules; + for (auto nType : normalizers) { + RegisterNormalizer(std::shared_ptr(INormalizerComponent::TFactory::Construct(nType, ctx))); + AFL_VERIFY(lastRegisteredNormalizer <= nType)("current", ToString(nType))("last", ToString(lastRegisteredNormalizer)); + lastRegisteredNormalizer = nType; + } + } + + void TNormalizationController::InitControllerState(NIceDb::TNiceDb& db) { + ui64 lastNormalizerId; + if (NColumnShard::Schema::GetSpecialValue(db, NColumnShard::Schema::EValueIds::LastNormalizerSequentialId, lastNormalizerId)) { + // We want to rerun all normalizers in case of binary rollback + if (lastNormalizerId <= GetLastNormalizerSequentialId()) { + LastAppliedNormalizerId = lastNormalizerId; + } + } + } + } diff --git a/ydb/core/tx/columnshard/normalizer/abstract/abstract.h b/ydb/core/tx/columnshard/normalizer/abstract/abstract.h index fadda5ce13ac..dad21751eb65 100644 --- a/ydb/core/tx/columnshard/normalizer/abstract/abstract.h +++ b/ydb/core/tx/columnshard/normalizer/abstract/abstract.h @@ -5,8 +5,12 @@ #include #include - #include +#include + +namespace NKikimr::NIceDb { + class TNiceDb; +} namespace NKikimr::NOlap { @@ -46,9 +50,17 @@ namespace NKikimr::NOlap { } }; + enum class ENormalizerSequentialId : ui32 { + Granules = 1, + Chunks, + PortionsCleaner, + TablesCleaner, + // PortionsMetadata + }; + class TNormalizationContext { YDB_ACCESSOR_DEF(TActorId, ResourceSubscribeActor); - YDB_ACCESSOR_DEF(TActorId, ColumnshardActor); + YDB_ACCESSOR_DEF(TActorId, ShardActor); std::shared_ptr ResourcesGuard; public: void SetResourcesGuard(std::shared_ptr rg) { @@ -56,6 +68,7 @@ namespace NKikimr::NOlap { } }; + class TNormalizationController; class INormalizerTask { @@ -71,37 +84,100 @@ namespace NKikimr::NOlap { using TPtr = std::shared_ptr; virtual ~INormalizerChanges() {} - virtual bool Apply(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& normalizationContext) const = 0; + virtual bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& normalizationContext) const = 0; + virtual void ApplyOnComplete(const TNormalizationController& normalizationContext) const { + Y_UNUSED(normalizationContext); + } + + virtual ui64 GetSize() const = 0; }; - class INormalizerComponent { + class TTrivialNormalizerTask : public INormalizerTask { + INormalizerChanges::TPtr Changes; public: - using TPtr = std::shared_ptr; - - virtual ~INormalizerComponent() {} - - bool WaitResult() const { - return AtomicGet(ActiveTasksCount) > 0; - } - - void OnResultReady() { - AFL_VERIFY(ActiveTasksCount > 0); - AtomicDecrement(ActiveTasksCount); + TTrivialNormalizerTask(const INormalizerChanges::TPtr& changes) + : Changes(changes) + { + AFL_VERIFY(Changes); } - virtual const TString& GetName() const = 0; - virtual TConclusion> Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) = 0; - protected: - TAtomic ActiveTasksCount = 0; + void Start(const TNormalizationController& /* controller */, const TNormalizationContext& /*nCtx*/) override; }; class TNormalizationController { + public: + class TInitContext { + TIntrusiveConstPtr StorageInfo; + public: + TInitContext(TTabletStorageInfo* info) + : StorageInfo(info) + {} + + TIntrusiveConstPtr GetStorageInfo() const { + return StorageInfo; + } + }; + + class INormalizerComponent { + public: + using TPtr = std::shared_ptr; + using TFactory = NObjectFactory::TParametrizedObjectFactory; + + virtual ~INormalizerComponent() {} + + bool HasActiveTasks() const { + return AtomicGet(ActiveTasksCount) > 0; + } + + void OnResultReady() { + AFL_VERIFY(ActiveTasksCount > 0); + AtomicDecrement(ActiveTasksCount); + } + + i64 GetActiveTasksCount() const { + return AtomicGet(ActiveTasksCount); + } + + virtual ENormalizerSequentialId GetType() const = 0; + + TString GetName() const { + return ToString(GetType()); + } + + ui32 GetSequentialId() const { + return (ui32) GetType(); + } + + TConclusion> Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { + if (controller.HasLastAppliedNormalizerId() && controller.GetLastAppliedNormalizerIdUnsafe() >= GetSequentialId()) { + return std::vector(); + } + auto result = DoInit(controller, txc); + if (!result.IsSuccess()) { + return result; + } + AtomicSet(ActiveTasksCount, result.GetResult().size()); + return result; + } + + private: + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) = 0; + + TAtomic ActiveTasksCount = 0; + }; + using TPtr = std::shared_ptr; + + private: std::shared_ptr StoragesManager; NOlap::NResourceBroker::NSubscribe::TTaskContext TaskSubscription; - std::vector Normalizers; + std::vector Normalizers; ui64 CurrentNormalizerIndex = 0; std::vector Counters; + YDB_READONLY_OPT(ui32, LastAppliedNormalizerId); + + private: + void RegisterNormalizer(INormalizerComponent::TPtr normalizer); public: TNormalizationController(std::shared_ptr storagesManager, const std::shared_ptr& counters) @@ -112,7 +188,14 @@ namespace NKikimr::NOlap { return TaskSubscription; } - void RegisterNormalizer(INormalizerComponent::TPtr normalizer); + void InitNormalizers(const TInitContext& ctx); + void UpdateControllerState(NIceDb::TNiceDb& db); + void InitControllerState(NIceDb::TNiceDb& db); + + ui32 GetLastNormalizerSequentialId() { + AFL_VERIFY(!Normalizers.empty()); + return Normalizers.back()->GetSequentialId(); + } std::shared_ptr GetStoragesManager() const { AFL_VERIFY(!!StoragesManager); diff --git a/ydb/core/tx/columnshard/normalizer/abstract/ya.make b/ydb/core/tx/columnshard/normalizer/abstract/ya.make index 48572e5fe02f..5ba07d4d2395 100644 --- a/ydb/core/tx/columnshard/normalizer/abstract/ya.make +++ b/ydb/core/tx/columnshard/normalizer/abstract/ya.make @@ -4,6 +4,8 @@ SRCS( abstract.cpp ) +GENERATE_ENUM_SERIALIZATION(abstract.h) + PEERDIR( ydb/core/tablet_flat ydb/core/tx/columnshard/blobs_action/abstract diff --git a/ydb/core/tx/columnshard/normalizer/granule/normalizer.cpp b/ydb/core/tx/columnshard/normalizer/granule/normalizer.cpp index 5eeaba2ae5af..081b40f2379e 100644 --- a/ydb/core/tx/columnshard/normalizer/granule/normalizer.cpp +++ b/ydb/core/tx/columnshard/normalizer/granule/normalizer.cpp @@ -31,7 +31,7 @@ class TGranulesNormalizer::TNormalizerResult : public INormalizerChanges { {} public: - bool Apply(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { + bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { using namespace NColumnShard; NIceDb::TNiceDb db(txc.DB); ACFL_INFO("normalizer", "TGranulesNormalizer")("message", TStringBuilder() << "apply " << Chunks.size() << " chunks"); @@ -48,6 +48,10 @@ class TGranulesNormalizer::TNormalizerResult : public INormalizerChanges { return true; } + ui64 GetSize() const override { + return Chunks.size(); + } + static std::optional> Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { using namespace NColumnShard; NIceDb::TNiceDb db(txc.DB); @@ -126,28 +130,15 @@ class TGranulesNormalizer::TNormalizerResult : public INormalizerChanges { }; -class TGranulesNormalizerTask : public INormalizerTask { - INormalizerChanges::TPtr Changes; -public: - TGranulesNormalizerTask(const INormalizerChanges::TPtr& changes) - : Changes(changes) - {} - - void Start(const TNormalizationController& /* controller */, const TNormalizationContext& nCtx) override { - TActorContext::AsActorContext().Send(nCtx.GetColumnshardActor(), std::make_unique(Changes)); - } -}; - -TConclusion> TGranulesNormalizer::Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { +TConclusion> TGranulesNormalizer::DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { auto changes = TNormalizerResult::Init(controller, txc); if (!changes) { return TConclusionStatus::Fail("Not ready");; } std::vector tasks; for (auto&& c : *changes) { - tasks.emplace_back(std::make_shared(c)); + tasks.emplace_back(std::make_shared(c)); } - AtomicSet(ActiveTasksCount, tasks.size()); return tasks; } diff --git a/ydb/core/tx/columnshard/normalizer/granule/normalizer.h b/ydb/core/tx/columnshard/normalizer/granule/normalizer.h index a3c78e4ba8e5..063f2298519c 100644 --- a/ydb/core/tx/columnshard/normalizer/granule/normalizer.h +++ b/ydb/core/tx/columnshard/normalizer/granule/normalizer.h @@ -6,15 +6,19 @@ namespace NKikimr::NOlap { -class TGranulesNormalizer: public NOlap::INormalizerComponent { +class TGranulesNormalizer: public TNormalizationController::INormalizerComponent { class TNormalizerResult; + + static inline INormalizerComponent::TFactory::TRegistrator Registrator = INormalizerComponent::TFactory::TRegistrator(ENormalizerSequentialId::Granules); public: - virtual const TString& GetName() const override { - const static TString name = "TGranulesNormalizer"; - return name; + TGranulesNormalizer(const TNormalizationController::TInitContext&) { + } + + virtual ENormalizerSequentialId GetType() const override { + return ENormalizerSequentialId::Granules; } - virtual TConclusion> Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; }; } diff --git a/ydb/core/tx/columnshard/normalizer/granule/ya.make b/ydb/core/tx/columnshard/normalizer/granule/ya.make index 8a8c7e5ab0aa..d44051621eb3 100644 --- a/ydb/core/tx/columnshard/normalizer/granule/ya.make +++ b/ydb/core/tx/columnshard/normalizer/granule/ya.make @@ -1,7 +1,7 @@ LIBRARY() SRCS( - normalizer.cpp + GLOBAL normalizer.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp index 81525fc0f905..5fb16edcc0d1 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.cpp @@ -15,7 +15,7 @@ class TChunksNormalizer::TNormalizerResult : public INormalizerChanges { : Chunks(std::move(chunks)) {} - bool Apply(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { + bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { using namespace NColumnShard; NIceDb::TNiceDb db(txc.DB); @@ -34,13 +34,17 @@ class TChunksNormalizer::TNormalizerResult : public INormalizerChanges { } return true; } + + ui64 GetSize() const override { + return Chunks.size(); + } }; class TRowsAndBytesChangesTask: public NConveyor::ITask { public: using TDataContainer = std::vector; private: - THashMap Blobs; + NBlobOperations::NRead::TCompositeReadBlobs Blobs; std::vector Chunks; TNormalizationContext NormContext; protected: @@ -48,13 +52,12 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { for (auto&& chunkInfo : Chunks) { const auto& blobRange = chunkInfo.GetBlobRange(); - auto blobIt = Blobs.find(blobRange); - Y_ABORT_UNLESS(blobIt != Blobs.end()); + auto blobData = Blobs.Extract(IStoragesManager::DefaultStorageId, blobRange); auto columnLoader = chunkInfo.GetLoader(); Y_ABORT_UNLESS(!!columnLoader); - TPortionInfo::TAssembleBlobInfo assembleBlob(blobIt->second); + TPortionInfo::TAssembleBlobInfo assembleBlob(blobData); auto batch = assembleBlob.BuildRecordBatch(*columnLoader); Y_ABORT_UNLESS(!!batch); @@ -63,12 +66,12 @@ class TRowsAndBytesChangesTask: public NConveyor::ITask { } auto changes = std::make_shared(std::move(Chunks)); - TActorContext::AsActorContext().Send(NormContext.GetColumnshardActor(), std::make_unique(changes)); + TActorContext::AsActorContext().Send(NormContext.GetShardActor(), std::make_unique(changes)); return true; } public: - TRowsAndBytesChangesTask(THashMap&& blobs, const TNormalizationContext& nCtx, std::vector&& chunks, std::shared_ptr>) + TRowsAndBytesChangesTask(NBlobOperations::NRead::TCompositeReadBlobs&& blobs, const TNormalizationContext& nCtx, std::vector&& chunks, std::shared_ptr>) : Blobs(std::move(blobs)) , Chunks(std::move(chunks)) , NormContext(nCtx) @@ -92,7 +95,7 @@ void TChunksNormalizer::TChunkInfo::InitSchema(const NColumnShard::TTablesManage Schema = tm.GetPrimaryIndexSafe().GetVersionedIndex().GetSchema(NOlap::TSnapshot(Key.GetPlanStep(), Key.GetTxId())); } -TConclusion> TChunksNormalizer::Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { +TConclusion> TChunksNormalizer::DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { using namespace NColumnShard; NIceDb::TNiceDb db(txc.DB); @@ -152,7 +155,6 @@ TConclusion> TChunksNormalizer::Init(const TN if (package.size() > 0) { tasks.emplace_back(std::make_shared>(std::move(package))); } - AtomicSet(ActiveTasksCount, tasks.size()); return tasks; } diff --git a/ydb/core/tx/columnshard/normalizer/portion/chunks.h b/ydb/core/tx/columnshard/normalizer/portion/chunks.h index d174b8903862..f272f956208a 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/chunks.h +++ b/ydb/core/tx/columnshard/normalizer/portion/chunks.h @@ -12,7 +12,7 @@ namespace NKikimr::NColumnShard { namespace NKikimr::NOlap { - class TChunksNormalizer : public INormalizerComponent { + class TChunksNormalizer : public TNormalizationController::INormalizerComponent { public: class TNormalizerResult; @@ -83,17 +83,17 @@ namespace NKikimr::NOlap { } }; + static inline INormalizerComponent::TFactory::TRegistrator Registrator = INormalizerComponent::TFactory::TRegistrator(ENormalizerSequentialId::Chunks); public: - TChunksNormalizer(TTabletStorageInfo* info) - : DsGroupSelector(info) + TChunksNormalizer(const TNormalizationController::TInitContext& info) + : DsGroupSelector(info.GetStorageInfo()) {} - virtual const TString& GetName() const override { - const static TString name = "TChunksNormalizer"; - return name; + virtual ENormalizerSequentialId GetType() const override { + return ENormalizerSequentialId::Chunks; } - virtual TConclusion> Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; private: NColumnShard::TBlobGroupSelector DsGroupSelector; diff --git a/ydb/core/tx/columnshard/normalizer/portion/clean.cpp b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp new file mode 100644 index 000000000000..8ee71df72d25 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/portion/clean.cpp @@ -0,0 +1,91 @@ +#include "clean.h" + +#include +#include +#include +#include + +#include + + +namespace NKikimr::NOlap { + +class TBlobsRemovingResult : public INormalizerChanges { + std::shared_ptr RemovingAction; + std::vector> Portions; +public: + TBlobsRemovingResult(std::shared_ptr removingAction, std::vector>&& portions) + : RemovingAction(removingAction) + , Portions(std::move(portions)) + {} + + bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { + NOlap::TBlobManagerDb blobManagerDb(txc.DB); + RemovingAction->OnExecuteTxAfterRemoving(blobManagerDb, true); + + TDbWrapper db(txc.DB, nullptr); + for (auto&& portion : Portions) { + AFL_CRIT(NKikimrServices::TX_COLUMNSHARD)("message", "remove lost portion")("path_id", portion->GetPathId())("portion_id", portion->GetPortionId()); + portion->RemoveFromDatabase(db); + } + return true; + } + + void ApplyOnComplete(const TNormalizationController& /* normController */) const override { + RemovingAction->OnCompleteTxAfterRemoving(true); + } + + ui64 GetSize() const override { + return Portions.size(); + } +}; + +class TBlobsRemovingTask : public INormalizerTask { + std::vector Blobs; + std::vector> Portions; +public: + TBlobsRemovingTask(std::vector&& blobs, std::vector>&& portions) + : Blobs(std::move(blobs)) + , Portions(std::move(portions)) + {} + + void Start(const TNormalizationController& controller, const TNormalizationContext& nCtx) override { + controller.GetCounters().CountObjects(Blobs.size()); + auto removeAction = controller.GetStoragesManager()->GetDefaultOperator()->StartDeclareRemovingAction(NBlobOperations::EConsumer::NORMALIZER); + for (auto&& blobId : Blobs) { + removeAction->DeclareSelfRemove(blobId); + } + TActorContext::AsActorContext().Send(nCtx.GetShardActor(), std::make_unique(std::make_shared(removeAction, std::move(Portions)))); + } +}; + + +bool TCleanPortionsNormalizer::CheckPortion(const NColumnShard::TTablesManager& tablesManager, const TPortionInfo& portionInfo) const { + return tablesManager.HasTable(portionInfo.GetAddress().GetPathId(), true); +} + +INormalizerTask::TPtr TCleanPortionsNormalizer::BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const { + std::vector blobIds; + THashMap> blobsByStorage; + for (auto&& portion : portions) { + auto schemaPtr = schemas->FindPtr(portion->GetPortionId()); + portion->FillBlobIdsByStorage(blobsByStorage, schemaPtr->get()->GetIndexInfo()); + } + for (auto&& [storageId, blobs] : blobsByStorage) { + if (storageId == NBlobOperations::TGlobal::DefaultStorageId) { + for (auto&& blobId : blobs) { + blobIds.emplace_back(blobId); + } + } else { + AFL_VERIFY(false)("details", "Invalid storage for normalizer"); + } + } + return std::make_shared(std::move(blobIds), std::move(portions)); +} + + TConclusion TCleanPortionsNormalizer::DoInitImpl(const TNormalizationController&, NTabletFlatExecutor::TTransactionContext&) { + return true; +} + + +} diff --git a/ydb/core/tx/columnshard/normalizer/portion/clean.h b/ydb/core/tx/columnshard/normalizer/portion/clean.h new file mode 100644 index 000000000000..dec0b6c8eacd --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/portion/clean.h @@ -0,0 +1,39 @@ +#pragma once + +#include "normalizer.h" +#include +#include + + +namespace NKikimr::NColumnShard { + class TTablesManager; +} + +namespace NKikimr::NOlap { + +class TCleanPortionsNormalizer : public TPortionsNormalizerBase { + static inline TFactory::TRegistrator Registrator = TFactory::TRegistrator(ENormalizerSequentialId::PortionsCleaner); +public: + class TNormalizerResult; + class TTask; + +public: + TCleanPortionsNormalizer(const TNormalizationController::TInitContext& info) + : TPortionsNormalizerBase(info) + {} + + virtual ENormalizerSequentialId GetType() const override { + return ENormalizerSequentialId::PortionsCleaner; + } + + virtual std::set GetColumnsFilter(const ISnapshotSchema::TPtr& /*schema*/) const override { + return {}; + } + + virtual INormalizerTask::TPtr BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const override; + virtual TConclusion DoInitImpl(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; + + virtual bool CheckPortion(const NColumnShard::TTablesManager& tablesManager, const TPortionInfo& portionInfo) const override; +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/portion/min_max.cpp b/ydb/core/tx/columnshard/normalizer/portion/min_max.cpp deleted file mode 100644 index 974efa329712..000000000000 --- a/ydb/core/tx/columnshard/normalizer/portion/min_max.cpp +++ /dev/null @@ -1,221 +0,0 @@ -#include "min_max.h" -#include "normalizer.h" - -#include -#include -#include - -#include - - -namespace NKikimr::NOlap { - -class TMinMaxSnapshotChangesTask: public NConveyor::ITask { -public: - using TDataContainer = std::vector>; -private: - THashMap Blobs; - TDataContainer Portions; - std::shared_ptr> Schemas; - TNormalizationContext NormContext; -protected: - virtual bool DoExecute() override { - Y_ABORT_UNLESS(!Schemas->empty()); - auto pkColumnIds = Schemas->begin()->second->GetPkColumnsIds(); - pkColumnIds.insert(TIndexInfo::GetSpecialColumnIds().begin(), TIndexInfo::GetSpecialColumnIds().end()); - - for (auto&& portionInfo : Portions) { - auto blobSchema = Schemas->FindPtr(portionInfo->GetPortionId()); - THashMap blobsDataAssemble; - for (auto&& i : portionInfo->Records) { - auto blobIt = Blobs.find(i.BlobRange); - Y_ABORT_UNLESS(blobIt != Blobs.end()); - blobsDataAssemble.emplace(i.BlobRange, blobIt->second); - } - - AFL_VERIFY(!!blobSchema)("details", portionInfo->DebugString()); - auto filteredSchema = std::make_shared(*blobSchema, pkColumnIds); - auto preparedBatch = portionInfo->PrepareForAssemble(**blobSchema, *filteredSchema, blobsDataAssemble); - auto batch = preparedBatch.Assemble(); - Y_ABORT_UNLESS(!!batch); - portionInfo->AddMetadata(**blobSchema, batch, portionInfo->GetMeta().GetTierName()); - } - - auto changes = std::make_shared(std::move(Portions)); - TActorContext::AsActorContext().Send(NormContext.GetColumnshardActor(), std::make_unique(changes)); - return true; - } - -public: - TMinMaxSnapshotChangesTask(THashMap&& blobs, const TNormalizationContext& nCtx, TDataContainer&& portions, std::shared_ptr> schemas) - : Blobs(std::move(blobs)) - , Portions(std::move(portions)) - , Schemas(schemas) - , NormContext(nCtx) - {} - - virtual TString GetTaskClassIdentifier() const override { - const static TString name = "TMinMaxSnapshotChangesTask"; - return name; - } - - static void FillBlobRanges(std::shared_ptr readAction, const std::shared_ptr& portion) { - for (auto&& chunk : portion->Records) { - readAction->AddRange(chunk.BlobRange); - } - } - - static ui64 GetMemSize(const std::shared_ptr& portion) { - return portion->GetRawBytes(); - } - - static bool CheckPortion(const TPortionInfo& portionInfo) { - if (!portionInfo.GetMeta().HasPrimaryKeyBorders() || !portionInfo.GetMeta().HasSnapshotMinMax()) { - return false; - } - return true; - } - - static std::set GetColumnsFilter(const ISnapshotSchema::TPtr& schema) { - return schema->GetPkColumnsIds(); - } -}; - - -class TPortionsNormalizer::TNormalizerResult : public INormalizerChanges { - TMinMaxSnapshotChangesTask::TDataContainer Portions; -public: - TNormalizerResult(TMinMaxSnapshotChangesTask::TDataContainer&& portions) - : Portions(std::move(portions)) - {} - - bool Apply(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { - using namespace NColumnShard; - NIceDb::TNiceDb db(txc.DB); - - for (auto&& portionInfo : Portions) { - for (auto&& chunk : portionInfo->Records) { - auto proto = portionInfo->GetMeta().SerializeToProto(chunk.ColumnId, chunk.Chunk); - if (!proto) { - continue; - } - auto rowProto = chunk.GetMeta().SerializeToProto(); - *rowProto.MutablePortionMeta() = std::move(*proto); - - db.Table().Key(0, portionInfo->GetDeprecatedGranuleId(), chunk.ColumnId, - portionInfo->GetMinSnapshot().GetPlanStep(), portionInfo->GetMinSnapshot().GetTxId(), portionInfo->GetPortion(), chunk.Chunk).Update( - NIceDb::TUpdate(rowProto.SerializeAsString()) - ); - } - } - return true; - } -}; - -TConclusion> TPortionsNormalizer::Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { - using namespace NColumnShard; - std::vector tasks; - - NIceDb::TNiceDb db(txc.DB); - - bool ready = true; - ready = ready & Schema::Precharge(db, txc.DB.GetScheme()); - if (!ready) { - return TConclusionStatus::Fail("Not ready"); - } - - TTablesManager tablesManager(controller.GetStoragesManager(), 0); - if (!tablesManager.InitFromDB(db)) { - ACFL_ERROR("normalizer", "TPortionsNormalizer")("error", "can't initialize tables manager"); - return TConclusionStatus::Fail("Can't load index"); - } - - if (!tablesManager.HasPrimaryIndex()) { - return tasks; - } - - THashMap> portions; - auto schemas = std::make_shared>(); - auto pkColumnIds = TMinMaxSnapshotChangesTask::GetColumnsFilter(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()); - - { - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return TConclusionStatus::Fail("Not ready"); - } - - TSnapshot lastSnapshot(0, 0); - ISnapshotSchema::TPtr currentSchema; - auto initPortionCB = [&](const TPortionInfo& portion, const TColumnChunkLoadContext& loadContext) { - if (!currentSchema || lastSnapshot != portion.GetMinSnapshot()) { - currentSchema = tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetSchema(portion.GetMinSnapshot()); - lastSnapshot = portion.GetMinSnapshot(); - } - - AFL_VERIFY(portion.ValidSnapshotInfo())("details", portion.DebugString()); - TColumnRecord rec(loadContext, currentSchema->GetIndexInfo()); - if (!pkColumnIds.contains(rec.ColumnId)) { - return; - } - auto it = portions.find(portion.GetPortion()); - auto portionMeta = loadContext.GetPortionMeta(); - if (it == portions.end()) { - Y_ABORT_UNLESS(portion.Records.empty()); - (*schemas)[portion.GetPortionId()] = currentSchema; - auto portionNew = std::make_shared(portion); - portionNew->AddRecord(currentSchema->GetIndexInfo(), rec, portionMeta); - it = portions.emplace(portion.GetPortion(), portionNew).first; - } else { - AFL_VERIFY(it->second->IsEqualWithSnapshots(portion))("self", it->second->DebugString())("item", portion.DebugString()); - it->second->AddRecord(currentSchema->GetIndexInfo(), rec, portionMeta); - } - }; - - while (!rowset.EndOfSet()) { - TPortionInfo portion = TPortionInfo::BuildEmpty(); - auto index = rowset.GetValue(); - Y_ABORT_UNLESS(index == 0); - - portion.SetPathId(rowset.GetValue()); - - portion.SetMinSnapshot(rowset.GetValue(), rowset.GetValue()); - portion.SetPortion(rowset.GetValue()); - portion.SetDeprecatedGranuleId(rowset.GetValue()); - portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); - - NOlap::TColumnChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); - initPortionCB(portion, chunkLoadContext); - - if (!rowset.Next()) { - return TConclusionStatus::Fail("Not ready"); - } - } - } - - std::vector> package; - package.reserve(100); - - ui64 brokenPortioncCount = 0; - for (auto&& portion : portions) { - if (TMinMaxSnapshotChangesTask::CheckPortion(*portion.second)) { - continue; - } - ++brokenPortioncCount; - package.emplace_back(portion.second); - if (package.size() == 1000) { - std::vector> local; - local.swap(package); - tasks.emplace_back(std::make_shared>(std::move(local), schemas)); - } - } - - if (package.size() > 0) { - tasks.emplace_back(std::make_shared>(std::move(package), schemas)); - } - - AtomicSet(ActiveTasksCount, tasks.size()); - ACFL_INFO("normalizer", "TPortionsNormalizer")("message", TStringBuilder() << brokenPortioncCount << " portions found"); - return tasks; -} - -} diff --git a/ydb/core/tx/columnshard/normalizer/portion/min_max.h b/ydb/core/tx/columnshard/normalizer/portion/min_max.h deleted file mode 100644 index 6b7f8cd1eb97..000000000000 --- a/ydb/core/tx/columnshard/normalizer/portion/min_max.h +++ /dev/null @@ -1,35 +0,0 @@ -#pragma once - -#include -#include - -#include - - -namespace NKikimr::NColumnShard { - class TTablesManager; -} - -namespace NKikimr::NOlap { - -class TPortionsNormalizer : public INormalizerComponent { -public: - class TNormalizerResult; - -public: - TPortionsNormalizer(TTabletStorageInfo* info) - : DsGroupSelector(info) - {} - - virtual const TString& GetName() const override { - const static TString name = "TPortionsNormalizer"; - return name; - } - - virtual TConclusion> Init(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; - -private: - NColumnShard::TBlobGroupSelector DsGroupSelector; -}; - -} diff --git a/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp b/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp index 2102bd3260f7..c1a3dc6399fb 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp +++ b/ydb/core/tx/columnshard/normalizer/portion/normalizer.cpp @@ -1,5 +1,111 @@ #include "normalizer.h" +#include +#include +#include + namespace NKikimr::NOlap { - + +TConclusion> TPortionsNormalizerBase::DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) { + auto initRes = DoInitImpl(controller,txc); + + if (initRes.IsFail()) { + return initRes; + } + + using namespace NColumnShard; + + NIceDb::TNiceDb db(txc.DB); + bool ready = true; + ready = ready & Schema::Precharge(db, txc.DB.GetScheme()); + if (!ready) { + return TConclusionStatus::Fail("Not ready"); + } + + NColumnShard::TTablesManager tablesManager(controller.GetStoragesManager(), 0); + if (!tablesManager.InitFromDB(db)) { + ACFL_ERROR("normalizer", "TPortionsNormalizer")("error", "can't initialize tables manager"); + return TConclusionStatus::Fail("Can't load index"); + } + + std::vector tasks; + if (!tablesManager.HasPrimaryIndex()) { + return tasks; + } + + auto columnsFilter = GetColumnsFilter(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex().GetLastSchema()); + + THashMap> portions; + auto schemas = std::make_shared>(); + + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("Not ready"); + } + + TPortionInfo::TSchemaCursor schema(tablesManager.GetPrimaryIndexSafe().GetVersionedIndex()); + auto initPortionCB = [&](const TPortionInfo& portion, const TColumnChunkLoadContext& loadContext) { + auto currentSchema = schema.GetSchema(portion); + AFL_VERIFY(portion.ValidSnapshotInfo())("details", portion.DebugString()); + + if (!columnsFilter.empty() && !columnsFilter.contains(loadContext.GetAddress().GetColumnId())) { + return; + } + auto it = portions.find(portion.GetPortionId()); + if (it == portions.end()) { + Y_ABORT_UNLESS(portion.Records.empty()); + (*schemas)[portion.GetPortionId()] = currentSchema; + it = portions.emplace(portion.GetPortionId(), std::make_shared(portion)).first; + } + TColumnRecord rec(it->second->RegisterBlobId(loadContext.GetBlobRange().GetBlobId()), loadContext, currentSchema->GetIndexInfo().GetColumnFeaturesVerified(loadContext.GetAddress().GetColumnId())); + AFL_VERIFY(it->second->IsEqualWithSnapshots(portion))("self", it->second->DebugString())("item", portion.DebugString()); + auto portionMeta = loadContext.GetPortionMeta(); + it->second->AddRecord(currentSchema->GetIndexInfo(), rec, portionMeta); + }; + + while (!rowset.EndOfSet()) { + TPortionInfo portion = TPortionInfo::BuildEmpty(); + auto index = rowset.GetValue(); + Y_ABORT_UNLESS(index == 0); + + portion.SetPathId(rowset.GetValue()); + portion.SetMinSnapshotDeprecated(NOlap::TSnapshot(rowset.GetValue(), rowset.GetValue())); + portion.SetPortion(rowset.GetValue()); + portion.SetDeprecatedGranuleId(rowset.GetValue()); + portion.SetRemoveSnapshot(rowset.GetValue(), rowset.GetValue()); + + NOlap::TColumnChunkLoadContext chunkLoadContext(rowset, &DsGroupSelector); + initPortionCB(portion, chunkLoadContext); + + if (!rowset.Next()) { + return TConclusionStatus::Fail("Not ready"); + } + } + } + + std::vector> package; + package.reserve(100); + + ui64 brokenPortioncCount = 0; + for (auto&& [_, portionInfo] : portions) { + if (CheckPortion(tablesManager, *portionInfo)) { + continue; + } + ++brokenPortioncCount; + package.emplace_back(portionInfo); + if (package.size() == 1000) { + std::vector> local; + local.swap(package); + tasks.emplace_back(BuildTask(std::move(local), schemas)); + } + } + + if (package.size() > 0) { + tasks.emplace_back(BuildTask(std::move(package), schemas)); + } + ACFL_INFO("normalizer", "TPortionsNormalizer")("message", TStringBuilder() << brokenPortioncCount << " portions found"); + return tasks; +} + } diff --git a/ydb/core/tx/columnshard/normalizer/portion/normalizer.h b/ydb/core/tx/columnshard/normalizer/portion/normalizer.h index 7c86476e724f..574a1c212873 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/normalizer.h +++ b/ydb/core/tx/columnshard/normalizer/portion/normalizer.h @@ -2,6 +2,7 @@ #include #include +#include #include #include @@ -11,6 +12,9 @@ #include +namespace NKikimr::NColumnShard { + class TTablesManager; +} namespace NKikimr::NOlap { template @@ -37,8 +41,8 @@ class TReadPortionsTask: public NOlap::NBlobOperations::NRead::ITask { NConveyor::TCompServiceOperator::SendTaskToExecute(task); } - virtual bool DoOnError(const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) override { - Y_UNUSED(status, range); + virtual bool DoOnError(const TString& storageId, const TBlobRange& range, const IBlobsReadingAction::TErrorStatus& status) override { + Y_UNUSED(status, range, storageId); return false; } @@ -62,7 +66,7 @@ class TPortionsNormalizerTask : public INormalizerTask { void Start(const TNormalizationController& controller, const TNormalizationContext& nCtx) override { controller.GetCounters().CountObjects(Package.size()); - auto readingAction = controller.GetStoragesManager()->GetInsertOperator()->StartReadingAction("CS::NORMALIZER"); + auto readingAction = controller.GetStoragesManager()->GetInsertOperator()->StartReadingAction(NBlobOperations::EConsumer::NORMALIZER); ui64 memSize = 0; for (auto&& data : Package) { TConveyorTask::FillBlobRanges(readingAction, data); @@ -74,4 +78,27 @@ class TPortionsNormalizerTask : public INormalizerTask { std::make_shared>(nCtx, actions, std::move(Package), Schemas), 1, memSize, "CS::NORMALIZER", controller.GetTaskSubscription())); } }; + +class TPortionsNormalizerBase : public TNormalizationController::INormalizerComponent { +public: + TPortionsNormalizerBase(const TNormalizationController::TInitContext& info) + : DsGroupSelector(info.GetStorageInfo()) + {} + + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override final; + +protected: + virtual INormalizerTask::TPtr BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const = 0; + virtual TConclusion DoInitImpl(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) = 0; + + virtual bool CheckPortion(const NColumnShard::TTablesManager& tablesManager, const TPortionInfo& /*portionInfo*/) const = 0; + + virtual std::set GetColumnsFilter(const ISnapshotSchema::TPtr& schema) const { + return schema->GetPkColumnsIds(); + } + +private: + NColumnShard::TBlobGroupSelector DsGroupSelector; +}; + } diff --git a/ydb/core/tx/columnshard/normalizer/portion/portion.cpp b/ydb/core/tx/columnshard/normalizer/portion/portion.cpp new file mode 100644 index 000000000000..739715f44125 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/portion/portion.cpp @@ -0,0 +1,75 @@ +#include "portion.h" + +#include +#include +#include + +#include + + +namespace NKikimr::NOlap { + +class TPortionsNormalizer::TNormalizerResult : public INormalizerChanges { + std::vector> Portions; + std::shared_ptr> Schemas; +public: + TNormalizerResult(std::vector>&& portions, std::shared_ptr> schemas) + : Portions(std::move(portions)) + , Schemas(schemas) + {} + + bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { + using namespace NColumnShard; + TDbWrapper db(txc.DB, nullptr); + + for (auto&& portionInfo : Portions) { + auto schema = Schemas->FindPtr(portionInfo->GetPortionId()); + AFL_VERIFY(!!schema)("portion_id", portionInfo->GetPortionId()); + portionInfo->SaveToDatabase(db, (*schema)->GetIndexInfo().GetPKFirstColumnId(), true); + } + return true; + } + + ui64 GetSize() const override { + return Portions.size(); + } +}; + +bool TPortionsNormalizer::CheckPortion(const NColumnShard::TTablesManager&, const TPortionInfo& portionInfo) const { + return KnownPortions.contains(portionInfo.GetAddress()); +} + +INormalizerTask::TPtr TPortionsNormalizer::BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const { + return std::make_shared(std::make_shared(std::move(portions), schemas)); +} + + TConclusion TPortionsNormalizer::DoInitImpl(const TNormalizationController&, NTabletFlatExecutor::TTransactionContext& txc) { + using namespace NColumnShard; + + NIceDb::TNiceDb db(txc.DB); + bool ready = true; + ready = ready & Schema::Precharge(db, txc.DB.GetScheme()); + if (!ready) { + return TConclusionStatus::Fail("Not ready"); + } + + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return TConclusionStatus::Fail("Not ready"); + } + + while (!rowset.EndOfSet()) { + TPortionAddress portionAddr(rowset.GetValue(), rowset.GetValue()); + KnownPortions.insert(portionAddr); + if (!rowset.Next()) { + return TConclusionStatus::Fail("Not ready"); + } + } + } + + return true; +} + + +} diff --git a/ydb/core/tx/columnshard/normalizer/portion/portion.h b/ydb/core/tx/columnshard/normalizer/portion/portion.h new file mode 100644 index 000000000000..505d6fe8d90a --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/portion/portion.h @@ -0,0 +1,39 @@ +#pragma once + +#include "normalizer.h" +#include +#include + + +namespace NKikimr::NColumnShard { + class TTablesManager; +} + +namespace NKikimr::NOlap { + +class TPortionsNormalizer : public TPortionsNormalizerBase { + static inline TFactory::TRegistrator Registrator = TFactory::TRegistrator(ENormalizerSequentialId::PortionsMetadata); +public: + class TNormalizerResult; + class TTask; + +public: + TPortionsNormalizer(const TNormalizationController::TInitContext& info) + : TPortionsNormalizerBase(info) + {} + + virtual ENormalizerSequentialId GetType() const override { + return ENormalizerSequentialId::PortionsMetadata; + } + + virtual INormalizerTask::TPtr BuildTask(std::vector>&& portions, std::shared_ptr> schemas) const override; + virtual TConclusion DoInitImpl(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; + + virtual bool CheckPortion(const NColumnShard::TTablesManager& tablesManager, const TPortionInfo& portionInfo) const override; + +private: + THashSet KnownPortions; + +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/portion/ya.make b/ydb/core/tx/columnshard/normalizer/portion/ya.make index 2dd2475a1987..9f6a67d026d7 100644 --- a/ydb/core/tx/columnshard/normalizer/portion/ya.make +++ b/ydb/core/tx/columnshard/normalizer/portion/ya.make @@ -2,8 +2,8 @@ LIBRARY() SRCS( normalizer.cpp - min_max.cpp - chunks.cpp + GLOBAL chunks.cpp + GLOBAL clean.cpp ) PEERDIR( diff --git a/ydb/core/tx/columnshard/normalizer/tables/normalizer.cpp b/ydb/core/tx/columnshard/normalizer/tables/normalizer.cpp new file mode 100644 index 000000000000..63bd4c027edf --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tables/normalizer.cpp @@ -0,0 +1,141 @@ +#include "normalizer.h" + +#include + +namespace NKikimr::NOlap { + +class TRemovedTablesNormalizer::TNormalizerResult : public INormalizerChanges { + struct TPathInfo { + ui64 PathId; + ui64 Step; + ui64 TxId; + }; + + std::vector PathIds; + +public: + TNormalizerResult(std::vector&& pathIds) + : PathIds(std::move(pathIds)) + {} + +public: + bool ApplyOnExecute(NTabletFlatExecutor::TTransactionContext& txc, const TNormalizationController& /* normController */) const override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + + for (auto&& pathInfo: PathIds) { + db.Table().Key(pathInfo.PathId, pathInfo.Step, pathInfo.TxId).Delete(); + db.Table().Key(pathInfo.PathId).Delete(); + } + return true; + } + + ui64 GetSize() const override { + return PathIds.size(); + } + + static std::optional> Init(NTabletFlatExecutor::TTransactionContext& txc) { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); + + bool ready = true; + ready = ready & Schema::Precharge(db, txc.DB.GetScheme()); + ready = ready & Schema::Precharge(db, txc.DB.GetScheme()); + ready = ready & Schema::Precharge(db, txc.DB.GetScheme()); + if (!ready) { + return std::nullopt; + } + + std::set notEmptyPaths; + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return std::nullopt; + } + + while (!rowset.EndOfSet()) { + const auto pathId = rowset.GetValue(); + notEmptyPaths.emplace(pathId); + + if (!rowset.Next()) { + return std::nullopt; + } + } + } + + + std::set droppedTables; + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return std::nullopt; + } + + while (!rowset.EndOfSet()) { + const auto pathId = rowset.GetValue(); + const NOlap::TSnapshot dropSnapshot(rowset.GetValue(), rowset.GetValue()); + + if (dropSnapshot.Valid() && !notEmptyPaths.contains(pathId)) { + droppedTables.emplace(pathId); + } + + if (!rowset.Next()) { + return std::nullopt; + } + } + } + + std::vector changes; + ui64 fullCount = 0; + + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { + return std::nullopt; + } + + std::vector toRemove; + while (!rowset.EndOfSet()) { + TPathInfo pathInfo; + pathInfo.PathId = rowset.GetValue(); + if (droppedTables.contains(pathInfo.PathId)) { + pathInfo.Step = rowset.GetValue(); + pathInfo.TxId = rowset.GetValue(); + toRemove.emplace_back(pathInfo); + ++fullCount; + } + + if (toRemove.size() == 10000) { + changes.emplace_back(std::make_shared(std::move(toRemove))); + toRemove.clear(); + } + + if (!rowset.Next()) { + return std::nullopt; + } + } + + if (toRemove.size() > 0) { + changes.emplace_back(std::make_shared(std::move(toRemove))); + } + } + + ACFL_INFO("normalizer", "TGranulesNormalizer")("message", TStringBuilder() << fullCount << " chunks found"); + return changes; + } + +}; + +TConclusion> TRemovedTablesNormalizer::DoInit(const TNormalizationController& /*controller*/, NTabletFlatExecutor::TTransactionContext& txc) { + auto changes = TNormalizerResult::Init(txc); + if (!changes) { + return TConclusionStatus::Fail("Not ready");; + } + std::vector tasks; + for (auto&& c : *changes) { + tasks.emplace_back(std::make_shared(c)); + } + return tasks; +} + +} diff --git a/ydb/core/tx/columnshard/normalizer/tables/normalizer.h b/ydb/core/tx/columnshard/normalizer/tables/normalizer.h new file mode 100644 index 000000000000..b22b6e39b21d --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tables/normalizer.h @@ -0,0 +1,23 @@ +#pragma once + +#include +#include + + +namespace NKikimr::NOlap { + +class TRemovedTablesNormalizer: public TNormalizationController::INormalizerComponent { + static inline INormalizerComponent::TFactory::TRegistrator Registrator = INormalizerComponent::TFactory::TRegistrator(ENormalizerSequentialId::TablesCleaner); + class TNormalizerResult; +public: + TRemovedTablesNormalizer(const TNormalizationController::TInitContext&) + {} + + virtual ENormalizerSequentialId GetType() const override { + return ENormalizerSequentialId::TablesCleaner; + } + + virtual TConclusion> DoInit(const TNormalizationController& controller, NTabletFlatExecutor::TTransactionContext& txc) override; +}; + +} diff --git a/ydb/core/tx/columnshard/normalizer/tables/ya.make b/ydb/core/tx/columnshard/normalizer/tables/ya.make new file mode 100644 index 000000000000..d44051621eb3 --- /dev/null +++ b/ydb/core/tx/columnshard/normalizer/tables/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + GLOBAL normalizer.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/normalizer/abstract +) + +END() diff --git a/ydb/core/tx/columnshard/operations/write.cpp b/ydb/core/tx/columnshard/operations/write.cpp index b580b2c2f340..38007b1f96a3 100644 --- a/ydb/core/tx/columnshard/operations/write.cpp +++ b/ydb/core/tx/columnshard/operations/write.cpp @@ -13,11 +13,12 @@ namespace NKikimr::NColumnShard { - TWriteOperation::TWriteOperation(const TWriteId writeId, const ui64 txId, const EOperationStatus& status, const TInstant createdAt) + TWriteOperation::TWriteOperation(const TWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt) : Status(status) , CreatedAt(createdAt) , WriteId(writeId) - , TxId(txId) + , LockId(lockId) + , Cookie(cookie) { } @@ -26,7 +27,8 @@ namespace NKikimr::NColumnShard { NEvWrite::TWriteMeta writeMeta((ui64)WriteId, tableId, source); std::shared_ptr task = std::make_shared(owner.TabletID(), ctx.SelfID, owner.BufferizationWriteActorId, - NEvWrite::TWriteData(writeMeta, data, owner.TablesManager.GetPrimaryIndex()->GetReplaceKey(), owner.StoragesManager->GetInsertOperator()->StartWritingAction("WRITING_OPERATOR"))); + NEvWrite::TWriteData(writeMeta, data, owner.TablesManager.GetPrimaryIndex()->GetReplaceKey(), + owner.StoragesManager->GetInsertOperator()->StartWritingAction(NOlap::NBlobOperations::EConsumer::WRITING_OPERATOR))); NConveyor::TCompServiceOperator::SendTaskToExecute(task); Status = EOperationStatus::Started; @@ -57,8 +59,21 @@ namespace NKikimr::NColumnShard { Y_ABORT_UNLESS(Status == EOperationStatus::Started); Status = EOperationStatus::Prepared; GlobalWriteIds = globalWriteIds; + NIceDb::TNiceDb db(txc.DB); - Schema::Operations_Write(db, *this); + NKikimrTxColumnShard::TInternalOperationData proto; + ToProto(proto); + + TString metadata; + Y_ABORT_UNLESS(proto.SerializeToString(&metadata)); + + db.Table().Key((ui64)WriteId).Update( + NIceDb::TUpdate((ui32)Status), + NIceDb::TUpdate(CreatedAt.Seconds()), + NIceDb::TUpdate(metadata), + NIceDb::TUpdate(LockId), + NIceDb::TUpdate(Cookie) + ); } void TWriteOperation::ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const { @@ -86,46 +101,67 @@ namespace NKikimr::NColumnShard { bool TOperationsManager::Load(NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); - auto rowset = db.Table().Select(); - if (!rowset.IsReady()) { - return false; - } - - while (!rowset.EndOfSet()) { - const TWriteId writeId = (TWriteId) rowset.GetValue(); - const ui64 createdAtSec = rowset.GetValue(); - const ui64 txId = rowset.GetValue(); - const TString metadata = rowset.GetValue(); - NKikimrTxColumnShard::TInternalOperationData metaProto; - Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); - const EOperationStatus status = (EOperationStatus) rowset.GetValue(); - - auto operation = std::make_shared(writeId, txId, status, TInstant::Seconds(createdAtSec)); - operation->FromProto(metaProto); - - Y_ABORT_UNLESS(operation->GetStatus() != EOperationStatus::Draft); - - auto [_, isOk] = Operations.emplace(operation->GetWriteId(), operation); - if (!isOk) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "duplicated_operation")("operation", *operation); + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { return false; } - Transactions[txId].push_back(operation->GetWriteId()); - LastWriteId = std::max(LastWriteId, operation->GetWriteId()); - if (!rowset.Next()) { + + while (!rowset.EndOfSet()) { + const TWriteId writeId = (TWriteId) rowset.GetValue(); + const ui64 createdAtSec = rowset.GetValue(); + const ui64 lockId = rowset.GetValue(); + const ui64 cookie = rowset.GetValueOrDefault(0); + const TString metadata = rowset.GetValue(); + const EOperationStatus status = (EOperationStatus) rowset.GetValue(); + + NKikimrTxColumnShard::TInternalOperationData metaProto; + Y_ABORT_UNLESS(metaProto.ParseFromString(metadata)); + + auto operation = std::make_shared(writeId, lockId, cookie, status, TInstant::Seconds(createdAtSec)); + operation->FromProto(metaProto); + AFL_VERIFY(operation->GetStatus() != EOperationStatus::Draft); + + auto [_, isOk] = Operations.emplace(operation->GetWriteId(), operation); + if (!isOk) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "duplicated_operation")("operation", *operation); + return false; + } + Locks[lockId].push_back(operation->GetWriteId()); + LastWriteId = std::max(LastWriteId, operation->GetWriteId()); + if (!rowset.Next()) { + return false; + } + } + } + { + auto rowset = db.Table().Select(); + if (!rowset.IsReady()) { return false; } + + while (!rowset.EndOfSet()) { + const ui64 lockId = rowset.GetValue(); + const ui64 txId = rowset.GetValue(); + AFL_VERIFY(Locks.contains(lockId))("lock_id", lockId); + Tx2Lock[txId] = lockId; + if (!rowset.Next()) { + return false; + } + } } return true; } bool TOperationsManager::CommitTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN)("tx_id", txId)("event", "transaction_commit_fails")); - auto tIt = Transactions.find(txId); - if (tIt == Transactions.end()) { - AFL_WARN(NKikimrServices::TX_COLUMNSHARD)("details", "skip_unknown_transaction"); + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)); + auto lockId = GetLockForTx(txId); + if (!lockId) { + ACFL_ERROR("details", "unknown_transaction"); return true; } + auto tIt = Locks.find(*lockId); + AFL_VERIFY(tIt != Locks.end())("tx_id", txId)("lock_id", *lockId); TVector commited; for (auto&& opId : tIt->second) { @@ -133,21 +169,20 @@ namespace NKikimr::NColumnShard { (*opPtr)->Commit(owner, txc, snapshot); commited.emplace_back(*opPtr); } - - Transactions.erase(txId); - for (auto&& op: commited) { - RemoveOperation(op, txc); - } + OnTransactionFinish(commited, txId, txc); return true; } bool TOperationsManager::AbortTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { - TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD_SCAN)("tx_id", txId)("event", "transaction_abort_fails")); - auto tIt = Transactions.find(txId); - if (tIt == Transactions.end()) { - AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("details", "unknown_transaction"); + TLogContextGuard gLogging(NActors::TLogContextBuilder::Build(NKikimrServices::TX_COLUMNSHARD)("tx_id", txId)); + + auto lockId = GetLockForTx(txId); + if (!lockId) { + ACFL_ERROR("details", "unknown_transaction"); return true; } + auto tIt = Locks.find(*lockId); + AFL_VERIFY(tIt != Locks.end())("tx_id", txId)("lock_id", *lockId); TVector aborted; for (auto&& opId : tIt->second) { @@ -156,10 +191,7 @@ namespace NKikimr::NColumnShard { aborted.emplace_back(*opPtr); } - Transactions.erase(txId); - for (auto&& op: aborted) { - RemoveOperation(op, txc); - } + OnTransactionFinish(aborted, txId, txc); return true; } @@ -171,22 +203,66 @@ namespace NKikimr::NColumnShard { return it->second; } - void TOperationsManager::RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc) { + void TOperationsManager::OnTransactionFinish(const TVector& operations, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + auto lockId = GetLockForTx(txId); + AFL_VERIFY(!!lockId)("tx_id", txId); + Locks.erase(*lockId); + Tx2Lock.erase(txId); + for (auto&& op: operations) { + RemoveOperation(op, txc); + } NIceDb::TNiceDb db(txc.DB); + db.Table().Key(txId, *lockId).Delete(); + } + + void TOperationsManager::RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc) { Operations.erase(op->GetWriteId()); - Schema::Operations_Erase(db, op->GetWriteId()); + NIceDb::TNiceDb db(txc.DB); + db.Table().Key((ui64)op->GetWriteId()).Delete(); } TWriteId TOperationsManager::BuildNextWriteId() { return ++LastWriteId; } - TWriteOperation::TPtr TOperationsManager::RegisterOperation(const ui64 txId) { + std::optional TOperationsManager::GetLockForTx(const ui64 txId) const { + auto lockIt = Tx2Lock.find(txId); + if (lockIt != Tx2Lock.end()) { + return lockIt->second; + } + return std::nullopt; + } + + void TOperationsManager::LinkTransaction(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { + Tx2Lock[txId] = lockId; + NIceDb::TNiceDb db(txc.DB); + db.Table().Key(txId, lockId).Update(); + } + + TWriteOperation::TPtr TOperationsManager::RegisterOperation(const ui64 lockId, const ui64 cookie) { auto writeId = BuildNextWriteId(); - auto operation = std::make_shared(writeId, txId, EOperationStatus::Draft, AppData()->TimeProvider->Now()); + auto operation = std::make_shared(writeId, lockId, cookie, EOperationStatus::Draft, AppData()->TimeProvider->Now()); Y_ABORT_UNLESS(Operations.emplace(operation->GetWriteId(), operation).second); - - Transactions[operation->GetTxId()].push_back(operation->GetWriteId()); + Locks[operation->GetLockId()].push_back(operation->GetWriteId()); return operation; } + + EOperationBehaviour TOperationsManager::GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite) { + if (evWrite.Record.HasLockTxId() && evWrite.Record.HasLockNodeId()) { + if (evWrite.Record.HasLocks() && evWrite.Record.GetLocks().GetOp() == NKikimrDataEvents::TKqpLocks::Commit) { + return EOperationBehaviour::CommitWriteLock; + } + + if (evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_PREPARE) { + return EOperationBehaviour::WriteWithLock; + } + + return EOperationBehaviour::Undefined; + } + + if (evWrite.Record.HasTxId() && evWrite.Record.GetTxMode() == NKikimrDataEvents::TEvWrite::MODE_PREPARE) { + return EOperationBehaviour::InTxWrite; + } + return EOperationBehaviour::Undefined; + } } diff --git a/ydb/core/tx/columnshard/operations/write.h b/ydb/core/tx/columnshard/operations/write.h index 4107b7e301bc..08fe7724b39c 100644 --- a/ydb/core/tx/columnshard/operations/write.h +++ b/ydb/core/tx/columnshard/operations/write.h @@ -1,6 +1,7 @@ #pragma once #include +#include #include #include #include @@ -28,17 +29,26 @@ namespace NKikimr::NColumnShard { Prepared = 3 }; + enum class EOperationBehaviour : ui32 { + Undefined = 1, + InTxWrite = 2, + WriteWithLock = 3, + CommitWriteLock = 4 + }; + class TWriteOperation { YDB_READONLY(EOperationStatus, Status, EOperationStatus::Draft); YDB_READONLY_DEF(TInstant, CreatedAt); YDB_READONLY_DEF(TWriteId, WriteId); - YDB_READONLY(ui64, TxId, 0); + YDB_READONLY(ui64, LockId, 0); + YDB_READONLY(ui64, Cookie, 0); YDB_READONLY_DEF(TVector, GlobalWriteIds); + YDB_ACCESSOR(EOperationBehaviour, Behaviour, EOperationBehaviour::Undefined); public: using TPtr = std::shared_ptr; - TWriteOperation(const TWriteId writeId, const ui64 txId, const EOperationStatus& status, const TInstant createdAt); + TWriteOperation(const TWriteId writeId, const ui64 lockId, const ui64 cookie, const EOperationStatus& status, const TInstant createdAt); void Start(TColumnShard& owner, const ui64 tableId, const NEvWrite::IDataContainer::TPtr& data, const NActors::TActorId& source, const TActorContext& ctx); void OnWriteFinish(NTabletFlatExecutor::TTransactionContext& txc, const TVector& globalWriteIds); @@ -46,7 +56,7 @@ namespace NKikimr::NColumnShard { void Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) const; void Out(IOutputStream& out) const { - out << "write_id=" << (ui64) WriteId << ";tx_id=" << TxId; + out << "write_id=" << (ui64) WriteId << ";lock_id=" << LockId; } void ToProto(NKikimrTxColumnShard::TInternalOperationData& proto) const; @@ -54,7 +64,8 @@ namespace NKikimr::NColumnShard { }; class TOperationsManager { - TMap> Transactions; + TMap> Locks; + TMap Tx2Lock; TMap Operations; TWriteId LastWriteId = TWriteId(0); @@ -64,11 +75,16 @@ namespace NKikimr::NColumnShard { TWriteOperation::TPtr GetOperation(const TWriteId writeId) const; bool CommitTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc, const NOlap::TSnapshot& snapshot); bool AbortTransaction(TColumnShard& owner, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + void LinkTransaction(const ui64 lockId, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); + std::optional GetLockForTx(const ui64 lockId) const; + + TWriteOperation::TPtr RegisterOperation(const ui64 lockId, const ui64 cookie); + static EOperationBehaviour GetBehaviour(const NEvents::TDataEvents::TEvWrite& evWrite); - TWriteOperation::TPtr RegisterOperation(const ui64 txId); private: TWriteId BuildNextWriteId(); void RemoveOperation(const TWriteOperation::TPtr& op, NTabletFlatExecutor::TTransactionContext& txc); + void OnTransactionFinish(const TVector& operations, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); }; } diff --git a/ydb/core/tx/columnshard/operations/write_data.cpp b/ydb/core/tx/columnshard/operations/write_data.cpp index b81b3839d7af..884e953dfb80 100644 --- a/ydb/core/tx/columnshard/operations/write_data.cpp +++ b/ydb/core/tx/columnshard/operations/write_data.cpp @@ -5,7 +5,7 @@ namespace NKikimr::NColumnShard { -bool TArrowData::Parse(const NKikimrDataEvents::TEvWrite_TOperation& proto, const NEvWrite::IPayloadData& payload) { +bool TArrowData::Parse(const NKikimrDataEvents::TEvWrite_TOperation& proto, const NEvWrite::IPayloadReader& payload) { if(proto.GetPayloadFormat() != NKikimrDataEvents::FORMAT_ARROW) { AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "invalid_payload_format")("payload_format", (ui64)proto.GetPayloadFormat()); diff --git a/ydb/core/tx/columnshard/operations/write_data.h b/ydb/core/tx/columnshard/operations/write_data.h index b4fcb5e68ef3..374cd060713b 100644 --- a/ydb/core/tx/columnshard/operations/write_data.h +++ b/ydb/core/tx/columnshard/operations/write_data.h @@ -18,7 +18,7 @@ class TArrowData : public NEvWrite::IDataContainer { : IndexSchema(schema) {} - bool Parse(const NKikimrDataEvents::TEvWrite::TOperation& proto, const NKikimr::NEvWrite::IPayloadData& payload); + bool Parse(const NKikimrDataEvents::TEvWrite::TOperation& proto, const NKikimr::NEvWrite::IPayloadReader& payload); virtual std::shared_ptr ExtractBatch() override; ui64 GetSchemaVersion() const override; ui64 GetSize() const override { diff --git a/ydb/core/tx/columnshard/operations/ya.make b/ydb/core/tx/columnshard/operations/ya.make index 200a78be1567..80dd03dd9b34 100644 --- a/ydb/core/tx/columnshard/operations/ya.make +++ b/ydb/core/tx/columnshard/operations/ya.make @@ -10,6 +10,7 @@ PEERDIR( ydb/core/protos ydb/core/tx/data_events ydb/services/metadata + ydb/core/tx/columnshard/data_sharing/destination/events ) END() diff --git a/ydb/core/tx/columnshard/resource_subscriber/actor.cpp b/ydb/core/tx/columnshard/resource_subscriber/actor.cpp index 91ed2f405b36..bd97ea30185a 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/actor.cpp +++ b/ydb/core/tx/columnshard/resource_subscriber/actor.cpp @@ -13,38 +13,46 @@ class TCookie: public TThrRefBase { } }; -void TActor::Handle(TEvStartTask::TPtr& ev) { - Y_ABORT_UNLESS(!Aborted); - auto task = ev->Get()->GetTask(); - Y_ABORT_UNLESS(task); - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ask_resources")("task", task->DebugString()); - Tasks.emplace(++Counter, task); - Send(NKikimr::NResourceBroker::MakeResourceBrokerID(), new NKikimr::NResourceBroker::TEvResourceBroker::TEvSubmitTask( - task->GetExternalTaskId(), - {{task->GetCPUAllocation(), task->GetMemoryAllocation()}}, - task->GetType(), - task->GetPriority(), - new TCookie(Counter) - )); - task->GetContext().GetCounters()->OnRequest(task->GetMemoryAllocation()); -} - -void TActor::Handle(NKikimr::NResourceBroker::TEvResourceBroker::TEvResourceAllocated::TPtr& ev) { - auto it = Tasks.find(((TCookie*)ev->Get()->Cookie.Get())->GetTaskIdentifier()); +void TActor::DoReplyAllocated(const ui64 internalTaskId, const ui64 rbTaskId) { + auto it = Tasks.find(internalTaskId); Y_ABORT_UNLESS(it != Tasks.end()); auto task = it->second; Tasks.erase(it); task->GetContext().GetCounters()->OnReply(task->GetMemoryAllocation()); if (Aborted) { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "result_resources_on_abort")("task_id", ev->Get()->TaskId)("task", task->DebugString()); - std::make_unique(ev->Get()->TaskId, task->GetExternalTaskId(), *task, SelfId(), task->GetContext()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "result_resources_on_abort")("task_id", rbTaskId)("task", task->DebugString()); + std::make_unique(rbTaskId, task->GetExternalTaskId(), *task, SelfId(), task->GetContext()); if (Tasks.empty()) { PassAway(); } } else { - AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "result_resources")("task_id", ev->Get()->TaskId)("task", task->DebugString()); - task->OnAllocationSuccess(ev->Get()->TaskId, SelfId()); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "result_resources")("task_id", rbTaskId)("task", task->DebugString()); + task->OnAllocationSuccess(rbTaskId, SelfId()); + } +} + +void TActor::Handle(TEvStartTask::TPtr& ev) { + Y_ABORT_UNLESS(!Aborted); + auto task = ev->Get()->GetTask(); + Y_ABORT_UNLESS(task); + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "ask_resources")("task", task->DebugString()); + Tasks.emplace(++Counter, task); + if (!task->GetCPUAllocation() && !task->GetMemoryAllocation()) { + DoReplyAllocated(Counter, 0); + } else { + Send(NKikimr::NResourceBroker::MakeResourceBrokerID(), new NKikimr::NResourceBroker::TEvResourceBroker::TEvSubmitTask( + task->GetExternalTaskId(), + {{task->GetCPUAllocation(), task->GetMemoryAllocation()}}, + task->GetType(), + task->GetPriority(), + new TCookie(Counter) + )); } + task->GetContext().GetCounters()->OnRequest(task->GetMemoryAllocation()); +} + +void TActor::Handle(NKikimr::NResourceBroker::TEvResourceBroker::TEvResourceAllocated::TPtr& ev) { + DoReplyAllocated(((TCookie*)ev->Get()->Cookie.Get())->GetTaskIdentifier(), ev->Get()->TaskId); } TActor::TActor(ui64 tabletId, const TActorId& parent) diff --git a/ydb/core/tx/columnshard/resource_subscriber/actor.h b/ydb/core/tx/columnshard/resource_subscriber/actor.h index 6b38d23c338d..030943880f75 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/actor.h +++ b/ydb/core/tx/columnshard/resource_subscriber/actor.h @@ -22,6 +22,7 @@ class TActor: public TActorBootstrapped { } } + void DoReplyAllocated(const ui64 internalTaskId, const ui64 rbTaskId); public: static TAtomicCounter WaitingBlobsCount; TActor(ui64 tabletId, const TActorId& parent); diff --git a/ydb/core/tx/columnshard/resource_subscriber/task.cpp b/ydb/core/tx/columnshard/resource_subscriber/task.cpp index 7c836f5df898..d429c1e936c1 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/task.cpp +++ b/ydb/core/tx/columnshard/resource_subscriber/task.cpp @@ -19,9 +19,11 @@ TResourcesGuard::~TResourcesGuard() { return; } AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "free_resources")("task_id", TaskId)("external_task_id", ExternalTaskId)("mem", Memory)("cpu", Cpu); - auto ev = std::make_unique(NKikimr::NResourceBroker::MakeResourceBrokerID(), Sender, new NKikimr::NResourceBroker::TEvResourceBroker::TEvFinishTask(TaskId)); - NActors::TActorContext::AsActorContext().Send(std::move(ev)); - Context.GetCounters()->GetBytesAllocated()->Remove(Memory); + if (TaskId) { + auto ev = std::make_unique(NKikimr::NResourceBroker::MakeResourceBrokerID(), Sender, new NKikimr::NResourceBroker::TEvResourceBroker::TEvFinishTask(TaskId)); + NActors::TActorContext::AsActorContext().Send(std::move(ev)); + Context.GetCounters()->GetBytesAllocated()->Remove(Memory); + } } TResourcesGuard::TResourcesGuard(const ui64 taskId, const TString& externalTaskId, const ITask& task, const NActors::TActorId& sender, const TTaskContext& context) @@ -31,18 +33,24 @@ TResourcesGuard::TResourcesGuard(const ui64 taskId, const TString& externalTaskI , Memory(task.GetMemoryAllocation()) , Cpu(task.GetCPUAllocation()) , Context(context) + , Priority(task.GetPriority()) { + AFL_VERIFY(taskId || (!Memory && !Cpu)); Context.GetCounters()->GetBytesAllocated()->Add(Memory); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "allocate_resources")("external_task_id", ExternalTaskId)("task_id", TaskId)("mem", Memory)("cpu", Cpu); } void TResourcesGuard::Update(const ui64 memNew) { + if (!TaskId) { + return; + } + AFL_VERIFY(Memory); Context.GetCounters()->GetBytesAllocated()->Remove(Memory); AFL_VERIFY(NActors::TlsActivationContext); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "update_resources")("task_id", TaskId)("external_task_id", ExternalTaskId)("mem", memNew)("cpu", Cpu)("mem_old", Memory); Memory = memNew; auto ev = std::make_unique(NKikimr::NResourceBroker::MakeResourceBrokerID(), Sender, new NKikimr::NResourceBroker::TEvResourceBroker::TEvUpdateTask(TaskId, {{Cpu, Memory}}, - Context.GetTypeName(), 1000)); + Context.GetTypeName(), Priority)); NActors::TActorContext::AsActorContext().Send(std::move(ev)); Context.GetCounters()->GetBytesAllocated()->Add(Memory); } diff --git a/ydb/core/tx/columnshard/resource_subscriber/task.h b/ydb/core/tx/columnshard/resource_subscriber/task.h index 0ea764acc404..df4b742f1ad9 100644 --- a/ydb/core/tx/columnshard/resource_subscriber/task.h +++ b/ydb/core/tx/columnshard/resource_subscriber/task.h @@ -26,6 +26,7 @@ class TResourcesGuard: public NColumnShard::TMonitoringObjectsCounter +#include + +namespace NKikimr::NOlap { + +TSimpleChunkMeta::TSimpleChunkMeta(const std::shared_ptr& column, const bool needMax, const bool isSortedColumn) { + Y_ABORT_UNLESS(column); + Y_ABORT_UNLESS(column->length()); + NumRows = column->length(); + RawBytes = NArrow::GetArrayDataSize(column); + + if (needMax) { + std::pair minMaxPos = {0, (column->length() - 1)}; + if (!isSortedColumn) { + minMaxPos = NArrow::FindMinMaxPosition(column); + Y_ABORT_UNLESS(minMaxPos.first >= 0); + Y_ABORT_UNLESS(minMaxPos.second >= 0); + } + + Max = NArrow::GetScalar(column, minMaxPos.second); + + Y_ABORT_UNLESS(Max); + } +} + +} diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h new file mode 100644 index 000000000000..8f8f902e4095 --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/abstract/chunk_meta.h @@ -0,0 +1,44 @@ +#pragma once +#include +#include + +#include +#include + +#include +#include + +namespace NKikimr::NOlap { + +class TSimpleChunkMeta { +protected: + std::shared_ptr Max; + ui32 NumRows = 0; + ui32 RawBytes = 0; + TSimpleChunkMeta() = default; +public: + TSimpleChunkMeta(const std::shared_ptr& column, const bool needMinMax, const bool isSortedColumn); + + ui64 GetMetadataSize() const { + return sizeof(ui32) + sizeof(ui32) + 8 * 3 * 2; + } + + std::shared_ptr GetMax() const { + return Max; + } + ui32 GetNumRows() const { + return NumRows; + } + ui32 GetRecordsCount() const { + return NumRows; + } + ui32 GetRawBytes() const { + return RawBytes; + } + + bool HasMax() const noexcept { + return Max.get(); + } + +}; +} diff --git a/ydb/core/tx/columnshard/blobs_action/memory.cpp b/ydb/core/tx/columnshard/splitter/abstract/chunks.cpp similarity index 60% rename from ydb/core/tx/columnshard/blobs_action/memory.cpp rename to ydb/core/tx/columnshard/splitter/abstract/chunks.cpp index 66ff21fc5004..05c8240f8afb 100644 --- a/ydb/core/tx/columnshard/blobs_action/memory.cpp +++ b/ydb/core/tx/columnshard/splitter/abstract/chunks.cpp @@ -1,4 +1,4 @@ -#include "memory.h" +#include "chunks.h" namespace NKikimr::NOlap { diff --git a/ydb/core/tx/columnshard/splitter/abstract/chunks.h b/ydb/core/tx/columnshard/splitter/abstract/chunks.h new file mode 100644 index 000000000000..e873ff80560e --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/abstract/chunks.h @@ -0,0 +1,125 @@ +#pragma once +#include +#include + +#include + +#include + +namespace NKikimr::NColumnShard { +class TSplitterCounters; +} + +namespace NKikimr::NOlap { + +class TPortionInfo; +class TSimpleColumnInfo; +class TColumnSaver; + +class IPortionDataChunk { +private: + YDB_READONLY(ui32, EntityId, 0); + + std::optional ChunkIdx; + +protected: + ui64 DoGetPackedSize() const { + return GetData().size(); + } + virtual const TString& DoGetData() const = 0; + virtual TString DoDebugString() const = 0; + virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; + virtual bool DoIsSplittable() const = 0; + virtual std::optional DoGetRecordsCount() const = 0; + virtual std::shared_ptr DoGetFirstScalar() const = 0; + virtual std::shared_ptr DoGetLastScalar() const = 0; + virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfo& portionInfo) const = 0; + virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& /*data*/, const TSimpleColumnInfo& /*columnInfo*/) const { + AFL_VERIFY(false); + return nullptr; + } +public: + IPortionDataChunk(const ui32 entityId, const std::optional& chunkIdx = {}) + : EntityId(entityId) + , ChunkIdx(chunkIdx) { + } + + virtual ~IPortionDataChunk() = default; + + TString DebugString() const { + return DoDebugString(); + } + + const TString& GetData() const { + return DoGetData(); + } + + ui64 GetPackedSize() const { + return DoGetPackedSize(); + } + + std::optional GetRecordsCount() const { + return DoGetRecordsCount(); + } + + ui32 GetRecordsCountVerified() const { + auto result = DoGetRecordsCount(); + AFL_VERIFY(result); + return *result; + } + + std::vector> InternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { + return DoInternalSplit(saver, counters, splitSizes); + } + + bool IsSplittable() const { + return DoIsSplittable(); + } + + ui16 GetChunkIdxVerified() const { + AFL_VERIFY(!!ChunkIdx); + return *ChunkIdx; + } + + std::optional GetChunkIdxOptional() const { + return ChunkIdx; + } + + void SetChunkIdx(const ui16 value) { + ChunkIdx = value; + } + + std::shared_ptr CopyWithAnotherBlob(TString&& data, const TSimpleColumnInfo& columnInfo) const { + return DoCopyWithAnotherBlob(std::move(data), columnInfo); + } + + std::shared_ptr GetFirstScalar() const { + auto result = DoGetFirstScalar(); + AFL_VERIFY(result); + return result; + } + std::shared_ptr GetLastScalar() const { + auto result = DoGetLastScalar(); + AFL_VERIFY(result); + return result; + } + + TChunkAddress GetChunkAddressVerified() const { + return TChunkAddress(GetEntityId(), GetChunkIdxVerified()); + } + + std::optional GetChunkAddressOptional() const { + if (ChunkIdx) { + return TChunkAddress(GetEntityId(), GetChunkIdxVerified()); + } else { + return {}; + } + } + + void AddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfo& portionInfo) const { + AFL_VERIFY(!bRange.IsValid()); + return DoAddIntoPortionBeforeBlob(bRange, portionInfo); + } +}; + +} diff --git a/ydb/core/tx/columnshard/splitter/abstract/ya.make b/ydb/core/tx/columnshard/splitter/abstract/ya.make new file mode 100644 index 000000000000..82886451c221 --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/abstract/ya.make @@ -0,0 +1,13 @@ +LIBRARY() + +SRCS( + chunks.cpp + chunk_meta.cpp +) + +PEERDIR( + ydb/core/tx/columnshard/engines/scheme/abstract + ydb/core/tx/columnshard/common +) + +END() diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.cpp b/ydb/core/tx/columnshard/splitter/batch_slice.cpp index 47dfe990eb11..83f9f90f77f2 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.cpp +++ b/ydb/core/tx/columnshard/splitter/batch_slice.cpp @@ -4,69 +4,123 @@ namespace NKikimr::NOlap { -bool TGeneralSerializedSlice::GroupBlobs(std::vector& blobs) { - std::vector> chunksInProgress; +class TChunksToSplit { +private: + YDB_READONLY_DEF(std::vector>, Chunks); + i64 FullSize = 0; +public: + ui64 GetFullSize() const { + return FullSize; + } + + ui32 size() const { + return Chunks.size(); + } + + void Clear() { + Chunks.clear(); + FullSize = 0; + } + + const std::shared_ptr& operator[](const ui32 index) const { + AFL_VERIFY(index < Chunks.size()); + return Chunks[index]; + } + + void AddChunks(const std::vector>& chunks) { + for (auto&& i : chunks) { + FullSize += i->GetPackedSize(); + Chunks.emplace_back(i); + } + } + + void PopFront(const ui32 count) { + AFL_VERIFY(count <= Chunks.size()); + for (ui32 i = 0; i < count; ++i) { + FullSize -= Chunks[i]->GetPackedSize(); + } + AFL_VERIFY(FullSize >= 0); + Chunks.erase(Chunks.begin(), Chunks.begin() + count); + } + + void Exchange(const ui32 index, std::vector>&& newChunks) { + AFL_VERIFY(index < Chunks.size()); + FullSize -= Chunks[index]->GetPackedSize(); + AFL_VERIFY(FullSize >= 0); + for (auto&& i : newChunks) { + FullSize += i->GetPackedSize(); + } + Chunks.erase(Chunks.begin() + index); + Chunks.insert(Chunks.begin() + index, newChunks.begin(), newChunks.end()); + } + + bool IsEmpty() { + return Chunks.empty(); + } +}; + +bool TGeneralSerializedSlice::GroupBlobsImpl(const NSplitter::TGroupFeatures& features, std::vector& blobs) { + TChunksToSplit chunksInProgress; std::sort(Data.begin(), Data.end()); for (auto&& i : Data) { - for (auto&& p : i.GetChunks()) { - chunksInProgress.emplace_back(p); + if (!features.Contains(i.GetEntityId())) { + continue; } + chunksInProgress.AddChunks(i.GetChunks()); } + InternalSplitsCount = 0; + AFL_VERIFY(chunksInProgress.size()); std::vector result; - Y_ABORT_UNLESS(Settings.GetMaxBlobSize() >= 2 * Settings.GetMinBlobSize()); - while (chunksInProgress.size()) { - i64 fullSize = 0; - for (auto&& i : chunksInProgress) { - fullSize += i->GetPackedSize(); - } - if (fullSize < Settings.GetMaxBlobSize()) { - result.emplace_back(TSplittedBlob()); - for (auto&& i : chunksInProgress) { - result.back().Take(i); - } - chunksInProgress.clear(); - break; - } + Y_ABORT_UNLESS(features.GetSplitSettings().GetMaxBlobSize() >= 2 * features.GetSplitSettings().GetMinBlobSize()); + while (!chunksInProgress.IsEmpty()) { bool hasNoSplitChanges = true; while (hasNoSplitChanges) { + if (chunksInProgress.GetFullSize() < (ui64)features.GetSplitSettings().GetMaxBlobSize()) { + result.emplace_back(TSplittedBlob(features.GetName())); + for (auto&& i : chunksInProgress.GetChunks()) { + result.back().Take(i); + } + chunksInProgress.Clear(); + break; + } hasNoSplitChanges = false; i64 partSize = 0; for (ui32 i = 0; i < chunksInProgress.size(); ++i) { const i64 nextPartSize = partSize + chunksInProgress[i]->GetPackedSize(); - const i64 nextOtherSize = fullSize - nextPartSize; - const i64 otherSize = fullSize - partSize; - if (nextPartSize >= Settings.GetMaxBlobSize() || nextOtherSize < Settings.GetMinBlobSize()) { - Y_ABORT_UNLESS(otherSize >= Settings.GetMinBlobSize()); - Y_ABORT_UNLESS(partSize < Settings.GetMaxBlobSize()); - if (partSize >= Settings.GetMinBlobSize()) { - result.emplace_back(TSplittedBlob()); + const i64 nextOtherSize = chunksInProgress.GetFullSize() - nextPartSize; + const i64 otherSize = chunksInProgress.GetFullSize() - partSize; + if (nextPartSize >= features.GetSplitSettings().GetMaxBlobSize() || nextOtherSize < features.GetSplitSettings().GetMinBlobSize()) { + Y_ABORT_UNLESS(otherSize >= features.GetSplitSettings().GetMinBlobSize()); + Y_ABORT_UNLESS(partSize < features.GetSplitSettings().GetMaxBlobSize()); + if (partSize >= features.GetSplitSettings().GetMinBlobSize()) { + result.emplace_back(TSplittedBlob(features.GetName())); for (ui32 chunk = 0; chunk < i; ++chunk) { result.back().Take(chunksInProgress[chunk]); } Counters->BySizeSplitter.OnCorrectSerialized(result.back().GetSize()); - chunksInProgress.erase(chunksInProgress.begin(), chunksInProgress.begin() + i); + chunksInProgress.PopFront(i); hasNoSplitChanges = true; } else { - Y_ABORT_UNLESS((i64)chunksInProgress[i]->GetPackedSize() > Settings.GetMinBlobSize() - partSize); - Y_ABORT_UNLESS(otherSize - (Settings.GetMinBlobSize() - partSize) >= Settings.GetMinBlobSize()); + Y_ABORT_UNLESS((i64)chunksInProgress[i]->GetPackedSize() > features.GetSplitSettings().GetMinBlobSize() - partSize); + Y_ABORT_UNLESS(otherSize - (features.GetSplitSettings().GetMinBlobSize() - partSize) >= features.GetSplitSettings().GetMinBlobSize()); std::vector> newChunks; const bool splittable = chunksInProgress[i]->IsSplittable(); if (splittable) { Counters->BySizeSplitter.OnTrashSerialized(chunksInProgress[i]->GetPackedSize()); - const std::vector sizes = {(ui64)(Settings.GetMinBlobSize() - partSize)}; + const std::vector sizes = {(ui64)(features.GetSplitSettings().GetMinBlobSize() - partSize)}; newChunks = chunksInProgress[i]->InternalSplit(Schema->GetColumnSaver(chunksInProgress[i]->GetEntityId()), Counters, sizes); - chunksInProgress.erase(chunksInProgress.begin() + i); - chunksInProgress.insert(chunksInProgress.begin() + i, newChunks.begin(), newChunks.end()); + ++InternalSplitsCount; + chunksInProgress.Exchange(i, std::move(newChunks)); } - TSplittedBlob newBlob; + TSplittedBlob newBlob(features.GetName()); for (ui32 chunk = 0; chunk <= i; ++chunk) { newBlob.Take(chunksInProgress[chunk]); } - AFL_VERIFY(splittable || newBlob.GetSize() < Settings.GetMaxBlobSize())("splittable", splittable)("blob_size", newBlob.GetSize())("max", Settings.GetMaxBlobSize()); - if (newBlob.GetSize() < Settings.GetMaxBlobSize()) { - chunksInProgress.erase(chunksInProgress.begin(), chunksInProgress.begin() + i + 1); + AFL_VERIFY(splittable || newBlob.GetSize() < features.GetSplitSettings().GetMaxBlobSize())("splittable", splittable)("blob_size", newBlob.GetSize())("max", features.GetSplitSettings().GetMaxBlobSize()); + if (newBlob.GetSize() < features.GetSplitSettings().GetMaxBlobSize()) { + chunksInProgress.PopFront(i + 1); result.emplace_back(std::move(newBlob)); Counters->BySizeSplitter.OnCorrectSerialized(result.back().GetSize()); } @@ -95,11 +149,9 @@ bool TGeneralSerializedSlice::GroupBlobs(std::vector& blobs) { return true; } -TGeneralSerializedSlice::TGeneralSerializedSlice(const std::map>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, - const TSplitSettings& settings) +TGeneralSerializedSlice::TGeneralSerializedSlice(const THashMap>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) : Schema(schema) - , Counters(counters) - , Settings(settings) { + , Counters(counters) { std::optional recordsCount; for (auto&& [entityId, chunks] : data) { TSplittedEntity entity(entityId); @@ -118,16 +170,16 @@ TGeneralSerializedSlice::TGeneralSerializedSlice(const std::map counters, const TSplitSettings& settings) +TGeneralSerializedSlice::TGeneralSerializedSlice(const ui32 recordsCount, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters) : RecordsCount(recordsCount) , Schema(schema) , Counters(counters) - , Settings(settings) { } -TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, const TSplitSettings& settings) - : TBase(TValidator::CheckNotNull(batch)->num_rows(), schema, counters, settings) +TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, + const NSplitter::TSplitSettings& settings) + : TBase(TValidator::CheckNotNull(batch)->num_rows(), schema, counters) , Batch(batch) { Y_ABORT_UNLESS(batch); @@ -145,7 +197,7 @@ TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr> chunks; - for (auto&& i : splitter.Split(i, Schema->GetField(c.GetEntityId()), Settings.GetMaxBlobSize())) { + for (auto&& i : splitter.Split(i, Schema->GetField(c.GetEntityId()), settings.GetMaxBlobSize())) { chunks.emplace_back(std::make_shared(c.GetEntityId(), i, Schema)); } c.SetChunks(chunks); @@ -154,6 +206,24 @@ TBatchSerializedSlice::TBatchSerializedSlice(const std::shared_ptr TBatchSerializedSlice::BuildSimpleSlices(const std::shared_ptr& batch, const NSplitter::TSplitSettings& settings, const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo) { + std::vector slices; + auto stats = schemaInfo->GetBatchSerializationStats(batch); + ui32 recordsCount = settings.GetMinRecordsCount(); + if (stats) { + const ui32 recordsCountForMinSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMinBlobSize()).value_or(recordsCount); + const ui32 recordsCountForMaxPortionSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMaxPortionSize()).value_or(recordsCount); + recordsCount = std::min(recordsCountForMaxPortionSize, std::max(recordsCount, recordsCountForMinSize)); + } + auto linearSplitInfo = TSimpleSplitter::GetOptimalLinearSplitting(batch->num_rows(), recordsCount); + for (auto it = linearSplitInfo.StartIterator(); it.IsValid(); it.Next()) { + std::shared_ptr current = batch->Slice(it.GetPosition(), it.GetCurrentPackSize()); + TBatchSerializedSlice slice(current, schemaInfo, counters, settings); + slices.emplace_back(std::move(slice)); + } + return slices; +} + void TGeneralSerializedSlice::MergeSlice(TGeneralSerializedSlice&& slice) { Y_ABORT_UNLESS(Data.size() == slice.Data.size()); RecordsCount += slice.GetRecordsCount(); @@ -163,4 +233,21 @@ void TGeneralSerializedSlice::MergeSlice(TGeneralSerializedSlice&& slice) { } } +bool TGeneralSerializedSlice::GroupBlobs(std::vector& blobs, const NSplitter::TEntityGroups& groups) { + if (groups.IsEmpty()) { + return GroupBlobsImpl(groups.GetDefaultGroupFeatures(), blobs); + } else { + std::vector result; + for (auto&& i : groups) { + std::vector blobsLocal; + if (!GroupBlobsImpl(i.second, blobsLocal)) { + return false; + } + result.insert(result.end(), blobsLocal.begin(), blobsLocal.end()); + } + std::swap(result, blobs); + return true; + } +} + } diff --git a/ydb/core/tx/columnshard/splitter/batch_slice.h b/ydb/core/tx/columnshard/splitter/batch_slice.h index 0c0aca979feb..acbb9a0414e4 100644 --- a/ydb/core/tx/columnshard/splitter/batch_slice.h +++ b/ydb/core/tx/columnshard/splitter/batch_slice.h @@ -4,60 +4,27 @@ #include "scheme_info.h" #include "column_info.h" #include "blob_info.h" +#include "similar_packer.h" #include #include #include -#include +#include #include namespace NKikimr::NOlap { -template -class TArrayView { -private: - typename TContainer::iterator Begin; - typename TContainer::iterator End; -public: - TArrayView(typename TContainer::iterator itBegin, typename TContainer::iterator itEnd) - : Begin(itBegin) - , End(itEnd) { - - } - - typename TContainer::iterator begin() { - return Begin; - } - - typename TContainer::iterator end() { - return End; - } - - typename TContainer::value_type& front() { - return *Begin; - } - - typename TContainer::value_type& operator[](const size_t index) { - return *(Begin + index); - } - - size_t size() { - return End - Begin; - } -}; - -template -using TVectorView = TArrayView>; - class TDefaultSchemaDetails: public ISchemaDetailInfo { private: ISnapshotSchema::TPtr Schema; - const TSaverContext Context; std::shared_ptr Stats; +protected: + virtual TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { + return Schema->GetColumnSaver(columnId); + } public: - TDefaultSchemaDetails(ISnapshotSchema::TPtr schema, const TSaverContext& context, const std::shared_ptr& stats) + TDefaultSchemaDetails(ISnapshotSchema::TPtr schema, const std::shared_ptr& stats) : Schema(schema) - , Context(context) , Stats(stats) { AFL_VERIFY(Stats); @@ -85,20 +52,17 @@ class TDefaultSchemaDetails: public ISchemaDetailInfo { virtual ui32 GetColumnId(const std::string& fieldName) const override { return Schema->GetColumnId(fieldName); } - virtual TColumnSaver GetColumnSaver(const ui32 columnId) const override { - return Schema->GetColumnSaver(columnId, Context); - } }; class TGeneralSerializedSlice { private: YDB_READONLY(ui32, RecordsCount, 0); + YDB_READONLY(ui32, InternalSplitsCount, 0); protected: std::vector Data; ui64 Size = 0; ISchemaDetailInfo::TPtr Schema; std::shared_ptr Counters; - TSplitSettings Settings; TGeneralSerializedSlice() = default; const TSplittedEntity& GetEntityDataVerified(const ui32& entityId) const { @@ -110,10 +74,19 @@ class TGeneralSerializedSlice { Y_ABORT_UNLESS(false); return Data.front(); } + bool GroupBlobsImpl(const NSplitter::TGroupFeatures& features, std::vector& blobs); public: - std::map>> GetPortionChunks() const { + THashMap>> GetPortionChunksToHash() const { + THashMap>> result; + for (auto&& i : Data) { + AFL_VERIFY(result.emplace(i.GetEntityId(), i.GetChunks()).second); + } + return result; + } + + std::map>> GetPortionChunksToMap() const { std::map>> result; for (auto&& i : Data) { AFL_VERIFY(result.emplace(i.GetEntityId(), i.GetChunks()).second); @@ -137,14 +110,10 @@ class TGeneralSerializedSlice { return Size; } - std::vector>> GroupChunksByBlobs() { - std::vector>> result; + std::vector GroupChunksByBlobs(const NSplitter::TEntityGroups& groups) { std::vector blobs; - GroupBlobs(blobs); - for (auto&& i : blobs) { - result.emplace_back(i.GetChunks()); - } - return result; + AFL_VERIFY(GroupBlobs(blobs, groups)); + return blobs; } explicit TGeneralSerializedSlice(TVectorView&& objects) { @@ -154,12 +123,12 @@ class TGeneralSerializedSlice { MergeSlice(std::move(objects[i])); } } - TGeneralSerializedSlice(const std::map>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, const TSplitSettings& settings); - TGeneralSerializedSlice(const ui32 recordsCount, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, const TSplitSettings& settings); + TGeneralSerializedSlice(const THashMap>>& data, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); + TGeneralSerializedSlice(const ui32 recordsCount, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters); void MergeSlice(TGeneralSerializedSlice&& slice); - bool GroupBlobs(std::vector& blobs); + bool GroupBlobs(std::vector& blobs, const NSplitter::TEntityGroups& groups); bool operator<(const TGeneralSerializedSlice& item) const { return Size < item.Size; @@ -171,7 +140,7 @@ class TBatchSerializedSlice: public TGeneralSerializedSlice { using TBase = TGeneralSerializedSlice; YDB_READONLY_DEF(std::shared_ptr, Batch); public: - TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, const TSplitSettings& settings); + TBatchSerializedSlice(const std::shared_ptr& batch, ISchemaDetailInfo::TPtr schema, std::shared_ptr counters, const NSplitter::TSplitSettings& settings); explicit TBatchSerializedSlice(TVectorView&& objects) { Y_ABORT_UNLESS(objects.size()); @@ -184,6 +153,10 @@ class TBatchSerializedSlice: public TGeneralSerializedSlice { Batch = NArrow::CombineBatches({Batch, slice.Batch}); TBase::MergeSlice(std::move(slice)); } + + static std::vector BuildSimpleSlices(const std::shared_ptr& batch, const NSplitter::TSplitSettings& settings, + const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo); + }; } diff --git a/ydb/core/tx/columnshard/splitter/blob_info.h b/ydb/core/tx/columnshard/splitter/blob_info.h index a4515f9f7849..d96b55fd0f9c 100644 --- a/ydb/core/tx/columnshard/splitter/blob_info.h +++ b/ydb/core/tx/columnshard/splitter/blob_info.h @@ -6,10 +6,17 @@ namespace NKikimr::NOlap { class TSplittedBlob { private: + YDB_READONLY_DEF(TString, GroupName); YDB_READONLY(i64, Size, 0); YDB_READONLY_DEF(std::vector>, Chunks); public: + TSplittedBlob(const TString& groupName) + : GroupName(groupName) + { + + } + void Take(const std::shared_ptr& chunk); bool operator<(const TSplittedBlob& item) const { return Size > item.Size; diff --git a/ydb/core/tx/columnshard/splitter/chunk_meta.cpp b/ydb/core/tx/columnshard/splitter/chunk_meta.cpp index 646a458638dd..3d41d9b0b6c6 100644 --- a/ydb/core/tx/columnshard/splitter/chunk_meta.cpp +++ b/ydb/core/tx/columnshard/splitter/chunk_meta.cpp @@ -1,27 +1 @@ #include "chunk_meta.h" -#include -#include - -namespace NKikimr::NOlap { - -TSimpleChunkMeta::TSimpleChunkMeta(const std::shared_ptr& column, const bool needMax, const bool isSortedColumn) { - Y_ABORT_UNLESS(column); - Y_ABORT_UNLESS(column->length()); - NumRows = column->length(); - RawBytes = NArrow::GetArrayDataSize(column); - - if (needMax) { - std::pair minMaxPos = {0, (column->length() - 1)}; - if (!isSortedColumn) { - minMaxPos = NArrow::FindMinMaxPosition(column); - Y_ABORT_UNLESS(minMaxPos.first >= 0); - Y_ABORT_UNLESS(minMaxPos.second >= 0); - } - - Max = NArrow::GetScalar(column, minMaxPos.second); - - Y_ABORT_UNLESS(Max); - } -} - -} diff --git a/ydb/core/tx/columnshard/splitter/chunk_meta.h b/ydb/core/tx/columnshard/splitter/chunk_meta.h index 4f367dde2f2f..874531d3537d 100644 --- a/ydb/core/tx/columnshard/splitter/chunk_meta.h +++ b/ydb/core/tx/columnshard/splitter/chunk_meta.h @@ -1,53 +1,2 @@ #pragma once -#include -#include - -#include -#include - -#include -#include - -namespace NKikimr::NOlap { - -class TSimpleChunkMeta { -protected: - std::shared_ptr Max; - std::optional NumRows; - std::optional RawBytes; - TSimpleChunkMeta() = default; -public: - TSimpleChunkMeta(const std::shared_ptr& column, const bool needMinMax, const bool isSortedColumn); - - - ui64 GetMetadataSize() const { - return sizeof(ui32) + sizeof(ui32) + 8 * 3 * 2; - } - - std::shared_ptr GetMax() const { - return Max; - } - std::optional GetNumRows() const { - return NumRows; - - } - std::optional GetRawBytes() const { - return RawBytes; - } - - ui32 GetNumRowsVerified() const { - Y_ABORT_UNLESS(NumRows); - return *NumRows; - } - - ui32 GetRawBytesVerified() const { - Y_ABORT_UNLESS(RawBytes); - return *RawBytes; - } - - bool HasMax() const noexcept { - return Max.get(); - } - -}; -} +#include "abstract/chunk_meta.h" diff --git a/ydb/core/tx/columnshard/splitter/chunks.cpp b/ydb/core/tx/columnshard/splitter/chunks.cpp index 0bf0b70fe6d6..c00c3f43e61b 100644 --- a/ydb/core/tx/columnshard/splitter/chunks.cpp +++ b/ydb/core/tx/columnshard/splitter/chunks.cpp @@ -24,8 +24,9 @@ std::vector> IPortionColumnChunk::DoInternalS return result; } -void IPortionColumnChunk::DoAddIntoPortion(const TBlobRange& bRange, TPortionInfo& portionInfo) const { - TColumnRecord rec(GetChunkAddress(), bRange, BuildSimpleChunkMeta()); +void IPortionColumnChunk::DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfo& portionInfo) const { + AFL_VERIFY(!bRange.IsValid()); + TColumnRecord rec(GetChunkAddressVerified(), bRange, BuildSimpleChunkMeta()); portionInfo.AppendOneChunkColumn(std::move(rec)); } diff --git a/ydb/core/tx/columnshard/splitter/chunks.h b/ydb/core/tx/columnshard/splitter/chunks.h index 73f88f15e5aa..bd7d6c80efdb 100644 --- a/ydb/core/tx/columnshard/splitter/chunks.h +++ b/ydb/core/tx/columnshard/splitter/chunks.h @@ -1,5 +1,6 @@ #pragma once -#include "chunk_meta.h" +#include "abstract/chunk_meta.h" +#include "abstract/chunks.h" #include #include @@ -7,91 +8,6 @@ namespace NKikimr::NOlap { -class IPortionDataChunk { -private: - YDB_READONLY(ui32, EntityId, 0); - - std::optional ChunkIdx; - -protected: - ui64 DoGetPackedSize() const { - return GetData().size(); - } - virtual const TString& DoGetData() const = 0; - virtual TString DoDebugString() const = 0; - virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; - virtual bool DoIsSplittable() const = 0; - virtual std::optional DoGetRecordsCount() const = 0; - virtual std::shared_ptr DoGetFirstScalar() const = 0; - virtual std::shared_ptr DoGetLastScalar() const = 0; - virtual void DoAddIntoPortion(const TBlobRange& bRange, TPortionInfo& portionInfo) const = 0; -public: - IPortionDataChunk(const ui32 entityId, const std::optional& chunkIdx = {}) - : EntityId(entityId) - , ChunkIdx(chunkIdx) { - } - - virtual ~IPortionDataChunk() = default; - - TString DebugString() const { - return DoDebugString(); - } - - const TString& GetData() const { - return DoGetData(); - } - - ui64 GetPackedSize() const { - return DoGetPackedSize(); - } - - std::optional GetRecordsCount() const { - return DoGetRecordsCount(); - } - - ui32 GetRecordsCountVerified() const { - auto result = DoGetRecordsCount(); - AFL_VERIFY(result); - return *result; - } - - std::vector> InternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const { - return DoInternalSplit(saver, counters, splitSizes); - } - - bool IsSplittable() const { - return DoIsSplittable(); - } - - ui16 GetChunkIdx() const { - AFL_VERIFY(!!ChunkIdx); - return *ChunkIdx; - } - - void SetChunkIdx(const ui16 value) { - ChunkIdx = value; - } - - std::shared_ptr GetFirstScalar() const { - auto result = DoGetFirstScalar(); - Y_ABORT_UNLESS(result); - return result; - } - std::shared_ptr GetLastScalar() const { - auto result = DoGetLastScalar(); - Y_ABORT_UNLESS(result); - return result; - } - - TChunkAddress GetChunkAddress() const { - return TChunkAddress(GetEntityId(), GetChunkIdx()); - } - - void AddIntoPortion(const TBlobRange& bRange, TPortionInfo& portionInfo) const { - return DoAddIntoPortion(bRange, portionInfo); - } -}; - class IPortionColumnChunk : public IPortionDataChunk { private: using TBase = IPortionDataChunk; @@ -103,7 +19,7 @@ class IPortionColumnChunk : public IPortionDataChunk { return DoGetRecordsCountImpl(); } - virtual void DoAddIntoPortion(const TBlobRange& bRange, TPortionInfo& portionInfo) const override; + virtual void DoAddIntoPortionBeforeBlob(const TBlobRangeLink16& bRange, TPortionInfo& portionInfo) const override; virtual std::vector> DoInternalSplitImpl(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const = 0; virtual std::vector> DoInternalSplit(const TColumnSaver& saver, const std::shared_ptr& counters, const std::vector& splitSizes) const override; diff --git a/ydb/core/tx/columnshard/splitter/rb_splitter.cpp b/ydb/core/tx/columnshard/splitter/rb_splitter.cpp deleted file mode 100644 index 698a9e6f3e4a..000000000000 --- a/ydb/core/tx/columnshard/splitter/rb_splitter.cpp +++ /dev/null @@ -1,71 +0,0 @@ -#include "rb_splitter.h" -#include "simple.h" - -namespace NKikimr::NOlap { - -TRBSplitLimiter::TRBSplitLimiter(std::shared_ptr counters, ISchemaDetailInfo::TPtr schemaInfo, - const std::shared_ptr batch, const TSplitSettings& settings) - : Counters(counters) - , Batch(batch) - , Settings(settings) -{ - Y_ABORT_UNLESS(Batch->num_rows()); - std::vector slices = BuildSimpleSlices(Batch, Settings, Counters, schemaInfo); - - auto chunks = TSimilarSlicer(Settings.GetMinBlobSize()).Split(slices); - ui32 chunkStartPosition = 0; - for (auto&& spanObjects : chunks) { - Slices.emplace_back(TBatchSerializedSlice(std::move(spanObjects))); - chunkStartPosition += spanObjects.size(); - } - Y_ABORT_UNLESS(chunkStartPosition == slices.size()); - ui32 recordsCountCheck = 0; - for (auto&& i : Slices) { - recordsCountCheck += i.GetRecordsCount(); - } - Y_ABORT_UNLESS(recordsCountCheck == batch->num_rows()); -} - -bool TRBSplitLimiter::Next(std::vector>>& portionBlobs, std::shared_ptr& batch) { - if (!Slices.size()) { - return false; - } - std::vector blobs; - Slices.front().GroupBlobs(blobs); - std::vector>> result; - std::map columnChunks; - for (auto&& i : blobs) { - if (blobs.size() == 1) { - Counters->MonoBlobs.OnBlobData(i.GetSize()); - } else { - Counters->SplittedBlobs.OnBlobData(i.GetSize()); - } - result.emplace_back(i.GetChunks()); - } - std::swap(result, portionBlobs); - batch = Slices.front().GetBatch(); - Slices.pop_front(); - return true; -} - -std::vector TRBSplitLimiter::BuildSimpleSlices(const std::shared_ptr& batch, const TSplitSettings& settings, - const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo) -{ - std::vector slices; - auto stats = schemaInfo->GetBatchSerializationStats(batch); - ui32 recordsCount = settings.GetMinRecordsCount(); - if (stats) { - const ui32 recordsCountForMinSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMinBlobSize()).value_or(recordsCount); - const ui32 recordsCountForMaxPortionSize = stats->PredictOptimalPackRecordsCount(batch->num_rows(), settings.GetMaxPortionSize()).value_or(recordsCount); - recordsCount = std::min(recordsCountForMaxPortionSize, std::max(recordsCount, recordsCountForMinSize)); - } - auto linearSplitInfo = TSimpleSplitter::GetOptimalLinearSplitting(batch->num_rows(), recordsCount); - for (auto it = linearSplitInfo.StartIterator(); it.IsValid(); it.Next()) { - std::shared_ptr current = batch->Slice(it.GetPosition(), it.GetCurrentPackSize()); - TBatchSerializedSlice slice(current, schemaInfo, counters, settings); - slices.emplace_back(std::move(slice)); - } - return slices; -} - -} diff --git a/ydb/core/tx/columnshard/splitter/rb_splitter.h b/ydb/core/tx/columnshard/splitter/rb_splitter.h deleted file mode 100644 index e5d25f2dcef9..000000000000 --- a/ydb/core/tx/columnshard/splitter/rb_splitter.h +++ /dev/null @@ -1,69 +0,0 @@ -#pragma once -#include "batch_slice.h" -#include "stats.h" -#include -#include -#include -#include - -#include - -namespace NKikimr::NOlap { - -class TSimilarSlicer { -private: - const ui64 BottomLimit = 0; -public: - TSimilarSlicer(const ui64 bottomLimit) - : BottomLimit(bottomLimit) { - - } - - template - std::vector> Split(std::vector& objects) { - ui64 fullSize = 0; - for (auto&& i : objects) { - fullSize += i.GetSize(); - } - if (fullSize <= BottomLimit) { - return {TVectorView(objects.begin(), objects.end())}; - } - ui64 currentSize = 0; - ui64 currentStart = 0; - std::vector> result; - for (ui32 i = 0; i < objects.size(); ++i) { - const ui64 nextSize = currentSize + objects[i].GetSize(); - const ui64 nextOtherSize = fullSize - nextSize; - if ((nextSize >= BottomLimit && nextOtherSize >= BottomLimit) || (i + 1 == objects.size())) { - result.emplace_back(TVectorView(objects.begin() + currentStart, objects.begin() + i + 1)); - currentSize = 0; - currentStart = i + 1; - } else { - currentSize = nextSize; - } - } - return result; - } -}; - -class TRBSplitLimiter { -private: - std::deque Slices; - std::shared_ptr Counters; - std::shared_ptr Batch; - TSplitSettings Settings; -public: - TRBSplitLimiter(std::shared_ptr counters, - ISchemaDetailInfo::TPtr schemaInfo, const std::shared_ptr batch, const TSplitSettings& settings); - - static std::vector BuildSimpleSlices(const std::shared_ptr& batch, const TSplitSettings& settings, - const std::shared_ptr& counters, const ISchemaDetailInfo::TPtr& schemaInfo); - - std::deque ExtractSlices() { - return std::move(Slices); - } - - bool Next(std::vector>>& portionBlobs, std::shared_ptr& batch); -}; - -} diff --git a/ydb/core/tx/columnshard/splitter/scheme_info.cpp b/ydb/core/tx/columnshard/splitter/scheme_info.cpp index 912c1d54f2fb..fe4a65604e11 100644 --- a/ydb/core/tx/columnshard/splitter/scheme_info.cpp +++ b/ydb/core/tx/columnshard/splitter/scheme_info.cpp @@ -2,4 +2,12 @@ namespace NKikimr::NOlap { +NKikimr::NOlap::TColumnSaver ISchemaDetailInfo::GetColumnSaver(const ui32 columnId) const { + auto saver = DoGetColumnSaver(columnId); + if (OverrideSerializer) { + saver.ResetSerializer(*OverrideSerializer); + } + return saver; +} + } diff --git a/ydb/core/tx/columnshard/splitter/scheme_info.h b/ydb/core/tx/columnshard/splitter/scheme_info.h index 1e00f53343b6..1e72e63e9d35 100644 --- a/ydb/core/tx/columnshard/splitter/scheme_info.h +++ b/ydb/core/tx/columnshard/splitter/scheme_info.h @@ -8,11 +8,15 @@ namespace NKikimr::NOlap { class ISchemaDetailInfo { +private: + YDB_ACCESSOR_DEF(std::optional, OverrideSerializer); +protected: + virtual TColumnSaver DoGetColumnSaver(const ui32 columnId) const = 0; public: using TPtr = std::shared_ptr; virtual ~ISchemaDetailInfo() = default; virtual ui32 GetColumnId(const std::string& fieldName) const = 0; - virtual TColumnSaver GetColumnSaver(const ui32 columnId) const = 0; + TColumnSaver GetColumnSaver(const ui32 columnId) const; virtual std::shared_ptr GetField(const ui32 columnId) const = 0; virtual std::optional GetColumnSerializationStats(const ui32 columnId) const = 0; virtual bool NeedMinMaxForColumn(const ui32 columnId) const = 0; diff --git a/ydb/core/tx/columnshard/splitter/settings.cpp b/ydb/core/tx/columnshard/splitter/settings.cpp index 36ba5be4f7ed..65a6b822ba5f 100644 --- a/ydb/core/tx/columnshard/splitter/settings.cpp +++ b/ydb/core/tx/columnshard/splitter/settings.cpp @@ -1,5 +1,5 @@ #include "settings.h" -namespace NKikimr::NOlap { +namespace NKikimr::NOlap::NSplitter { } diff --git a/ydb/core/tx/columnshard/splitter/settings.h b/ydb/core/tx/columnshard/splitter/settings.h index 34bc29783799..146d1147aef2 100644 --- a/ydb/core/tx/columnshard/splitter/settings.h +++ b/ydb/core/tx/columnshard/splitter/settings.h @@ -2,9 +2,15 @@ #include +#include + #include +#include +#include +#include +#include -namespace NKikimr::NOlap { +namespace NKikimr::NOlap::NSplitter { class TSplitSettings { private: @@ -29,4 +35,97 @@ class TSplitSettings { return MaxPortionSize; } }; + +class TGroupFeatures { +private: + YDB_READONLY_DEF(TString, Name); + YDB_READONLY_DEF(TSplitSettings, SplitSettings); + YDB_READONLY_DEF(std::set, EntityIds); +public: + TGroupFeatures(const TString& name, const TSplitSettings& settings, std::set&& entities) + : Name(name) + , SplitSettings(settings) + , EntityIds(std::move(entities)) { + AFL_VERIFY(!!Name); + } + + TGroupFeatures(const TString& name, const TSplitSettings& settings) + : Name(name) + , SplitSettings(settings) { + AFL_VERIFY(!!Name); + } + + void AddEntity(const ui32 entityId) { + AFL_VERIFY(EntityIds.emplace(entityId).second); + } + + bool IsEmpty() const { + return EntityIds.empty(); + } + + bool Contains(const ui32 entityId) const { + return EntityIds.empty() || EntityIds.contains(entityId); + } +}; + +class TEntityGroups { +private: + THashMap GroupEntities; + THashSet UsedEntityIds; + TGroupFeatures DefaultGroupFeatures; +public: + TEntityGroups(const TGroupFeatures& defaultGroup) + : DefaultGroupFeatures(defaultGroup) { + AFL_VERIFY(DefaultGroupFeatures.IsEmpty())("problem", "default group cannot be not empty"); + } + + TEntityGroups(const TSplitSettings& splitSettings, const TString& name) + : DefaultGroupFeatures(name, splitSettings) { + + } + + const TGroupFeatures& GetDefaultGroupFeatures() const { + return DefaultGroupFeatures; + } + + bool IsEmpty() const { + return GroupEntities.empty(); + } + + TGroupFeatures& RegisterGroup(const TString& groupName, const TSplitSettings& settings) { + auto it = GroupEntities.find(groupName); + AFL_VERIFY(it == GroupEntities.end()); + return GroupEntities.emplace(groupName, TGroupFeatures(groupName, settings)).first->second; + } + + TGroupFeatures& MutableGroupVerified(const TString& groupName) { + auto it = GroupEntities.find(groupName); + AFL_VERIFY(it != GroupEntities.end()); + return it->second; + } + + TGroupFeatures* GetGroupOptional(const TString& groupName) { + auto it = GroupEntities.find(groupName); + if (it != GroupEntities.end()) { + return &it->second; + } else { + return nullptr; + } + } + + void Add(TGroupFeatures&& features, const TString& groupName) { + for (auto&& i : features.GetEntityIds()) { + AFL_VERIFY(UsedEntityIds.emplace(i).second); + } + AFL_VERIFY(GroupEntities.emplace(groupName, std::move(features)).second); + } + + THashMap::const_iterator begin() const { + return GroupEntities.begin(); + } + + THashMap::const_iterator end() const { + return GroupEntities.end(); + } +}; } diff --git a/ydb/core/tx/columnshard/splitter/similar_packer.cpp b/ydb/core/tx/columnshard/splitter/similar_packer.cpp new file mode 100644 index 000000000000..9d22b3a6b255 --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/similar_packer.cpp @@ -0,0 +1,5 @@ +#include "similar_packer.h" + +namespace NKikimr::NOlap { + +} diff --git a/ydb/core/tx/columnshard/splitter/similar_packer.h b/ydb/core/tx/columnshard/splitter/similar_packer.h new file mode 100644 index 000000000000..54abde2640fb --- /dev/null +++ b/ydb/core/tx/columnshard/splitter/similar_packer.h @@ -0,0 +1,81 @@ +#pragma once +#include + +#include + +namespace NKikimr::NOlap { + +template +class TArrayView { +private: + typename TContainer::iterator Begin; + typename TContainer::iterator End; +public: + TArrayView(typename TContainer::iterator itBegin, typename TContainer::iterator itEnd) + : Begin(itBegin) + , End(itEnd) { + + } + + typename TContainer::iterator begin() { + return Begin; + } + + typename TContainer::iterator end() { + return End; + } + + typename TContainer::value_type& front() { + return *Begin; + } + + typename TContainer::value_type& operator[](const size_t index) { + return *(Begin + index); + } + + size_t size() { + return End - Begin; + } +}; + +template +using TVectorView = TArrayView>; + +class TSimilarPacker { +private: + const ui64 BottomLimitNecessary = 0; +public: + TSimilarPacker(const ui64 bottomLimitNecessary) + : BottomLimitNecessary(bottomLimitNecessary) + { + + } + + template + std::vector> Split(std::vector& objects) { + ui64 fullSize = 0; + for (auto&& i : objects) { + fullSize += i.GetSize(); + } + if (fullSize <= BottomLimitNecessary) { + return {TVectorView(objects.begin(), objects.end())}; + } + ui64 currentSize = 0; + ui64 currentStart = 0; + std::vector> result; + for (ui32 i = 0; i < objects.size(); ++i) { + const ui64 nextSize = currentSize + objects[i].GetSize(); + const ui64 nextOtherSize = fullSize - nextSize; + if ((nextSize >= BottomLimitNecessary && nextOtherSize >= BottomLimitNecessary) || (i + 1 == objects.size())) { + result.emplace_back(TVectorView(objects.begin() + currentStart, objects.begin() + i + 1)); + currentSize = 0; + currentStart = i + 1; + } else { + currentSize = nextSize; + } + } + return result; + } +}; + +} diff --git a/ydb/core/tx/columnshard/splitter/simple.h b/ydb/core/tx/columnshard/splitter/simple.h index d83df17125a0..48c7b9efa009 100644 --- a/ydb/core/tx/columnshard/splitter/simple.h +++ b/ydb/core/tx/columnshard/splitter/simple.h @@ -172,6 +172,10 @@ class TSplittedColumnChunk: public IPortionColumnChunk { virtual std::shared_ptr DoGetLastScalar() const override { return Data.GetLastScalar(); } + virtual std::shared_ptr DoCopyWithAnotherBlob(TString&& /*data*/, const TSimpleColumnInfo& /*columnInfo*/) const override { + AFL_VERIFY(false); + return nullptr; + } public: i64 GetSize() const { diff --git a/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp b/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp index 72aa5539f6b2..b63524185294 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp +++ b/ydb/core/tx/columnshard/splitter/ut/ut_splitter.cpp @@ -1,9 +1,17 @@ -#include -#include +#include +#include +#include +#include #include +#include + #include #include #include +#include + +#include + #include Y_UNIT_TEST_SUITE(Splitter) { @@ -13,6 +21,11 @@ Y_UNIT_TEST_SUITE(Splitter) { class TTestSnapshotSchema: public NKikimr::NOlap::ISchemaDetailInfo { private: mutable std::map Decoder; + protected: + virtual NKikimr::NOlap::TColumnSaver DoGetColumnSaver(const ui32 columnId) const override { + return NKikimr::NOlap::TColumnSaver(nullptr, std::make_shared(arrow::ipc::IpcOptions::Defaults())); + } + public: virtual bool NeedMinMaxForColumn(const ui32 /*columnId*/) const override { return true; @@ -21,10 +34,6 @@ Y_UNIT_TEST_SUITE(Splitter) { return false; } - virtual NKikimr::NOlap::TColumnSaver GetColumnSaver(const ui32 columnId) const override { - return NKikimr::NOlap::TColumnSaver(nullptr, std::make_shared(arrow::ipc::IpcOptions::Defaults())); - } - virtual std::optional GetColumnSerializationStats(const ui32 /*columnId*/) const override { return {}; } @@ -66,79 +75,98 @@ Y_UNIT_TEST_SUITE(Splitter) { std::shared_ptr Schema = std::make_shared(); YDB_ACCESSOR_DEF(std::optional, ExpectSlicesCount); YDB_ACCESSOR_DEF(std::optional, ExpectBlobsCount); - YDB_ACCESSOR(bool, HasMultiSplit, false); - public: + YDB_ACCESSOR(std::optional, ExpectPortionsCount, 1); + YDB_ACCESSOR_DEF(std::optional, ExpectChunksCount); + YDB_ACCESSOR(std::optional, ExpectedInternalSplitsCount, 0); - void Execute(std::shared_ptr batch) { + public: + void Execute(std::shared_ptr batch, + const NKikimr::NOlap::NSplitter::TSplitSettings& settings = NKikimr::NOlap::NSplitter::TSplitSettings() + ) { + using namespace NKikimr::NOlap; NKikimr::NColumnShard::TIndexationCounters counters("test"); - NKikimr::NOlap::TRBSplitLimiter limiter(counters.SplitterCounters, Schema, batch, NKikimr::NOlap::TSplitSettings()); - std::vector>> chunksForBlob; - std::map>> restoredBatch; - std::vector blobsSize; - bool hasMultiSplit = false; + std::vector generalSlices; + { + auto slices = TBatchSerializedSlice::BuildSimpleSlices(batch, settings, counters.SplitterCounters, Schema); + for (auto&& i : slices) { + generalSlices.emplace_back(i); + } + } + + TSimilarPacker packer(settings.GetExpectedPortionSize()); + auto packs = packer.Split(generalSlices); + const NSplitter::TEntityGroups groups(settings, "default"); + const ui32 portionsCount = packs.size(); ui32 blobsCount = 0; - ui32 slicesCount = 0; - std::shared_ptr sliceBatch; - while (limiter.Next(chunksForBlob, sliceBatch)) { - ++slicesCount; - TStringBuilder sb; - std::map recordsCountByColumn; - for (auto&& chunks : chunksForBlob) { - ++blobsCount; - ui64 blobSize = 0; - sb << "["; - std::set blobColumnChunks; - for (auto&& iData : chunks) { - auto i = dynamic_pointer_cast(iData); - AFL_VERIFY(i); - const ui32 columnId = i->GetColumnId(); - recordsCountByColumn[columnId] += i->GetRecordsCountVerified(); - restoredBatch[Schema->GetColumnName(columnId)].emplace_back(*Schema->GetColumnLoader(columnId).Apply(i->GetData())); - blobSize += i->GetData().size(); - if (i->GetRecordsCount() != NKikimr::NOlap::TSplitSettings().GetMinRecordsCount() && !blobColumnChunks.emplace(columnId).second) { - hasMultiSplit = true; + ui32 chunksCount = 0; + ui32 pagesSum = 0; + ui32 portionShift = 0; + ui32 internalSplitsCount = 0; + for (auto&& i : packs) { + ui32 portionRecordsCount = 0; + for (auto&& rc : i) { + portionRecordsCount += rc.GetRecordsCount(); + } + const ui32 pagesOriginal = i.size(); + pagesSum += pagesOriginal; + TGeneralSerializedSlice slice(std::move(i)); + auto blobsLocal = slice.GroupChunksByBlobs(groups); + internalSplitsCount += slice.GetInternalSplitsCount(); + blobsCount += blobsLocal.size(); + THashMap>> entityChunks; + ui32 portionSize = 0; + for (auto&& b : blobsLocal) { + chunksCount += b.GetChunks().size(); + ui64 bSize = 0; + for (auto&& c : b.GetChunks()) { + bSize += c->GetData().size(); + AFL_VERIFY(c->GetEntityId()); + auto& v = entityChunks[c->GetEntityId()]; + if (v.size()) { + AFL_VERIFY(v.back()->GetChunkIdxVerified() + 1 == c->GetChunkIdxVerified()); } - sb << "(" << i->DebugString() << ")"; + entityChunks[c->GetEntityId()].emplace_back(c); } - blobsSize.emplace_back(blobSize); - sb << "];"; + portionSize += bSize; + AFL_VERIFY(bSize < (ui64)settings.GetMaxBlobSize()); + AFL_VERIFY(bSize * 1.01 > (ui64)settings.GetMinBlobSize() || (packs.size() == 1 && blobsLocal.size() == 1))("blob_size", bSize); } - std::optional columnRecordsCount; - for (auto&& i : recordsCountByColumn) { - if (!columnRecordsCount) { - columnRecordsCount = i.second; - } else { - Y_ABORT_UNLESS(i.second == *columnRecordsCount); + AFL_VERIFY(portionSize >= settings.GetExpectedPortionSize() || packs.size() == 1)("size", portionSize)("limit", settings.GetMaxPortionSize()); + + THashMap> entitiesByRecordsCount; + ui32 pagesRestore = 0; + for (auto&& e : entityChunks) { + const std::shared_ptr arr = batch->GetColumnByName(Schema->GetColumnName(e.first)); + AFL_VERIFY(arr); + ui32 count = 0; + for (auto&& c : e.second) { + auto slice = arr->Slice(count + portionShift, c->GetRecordsCountVerified()); + auto readBatch = *Schema->GetColumnLoader(e.first).Apply(c->GetData()); + AFL_VERIFY(slice->length() == readBatch->num_rows()); + Y_ABORT_UNLESS(readBatch->column(0)->RangeEquals(*slice, 0, readBatch->num_rows(), 0, arrow::EqualOptions::Defaults())); + count += c->GetRecordsCountVerified(); + AFL_VERIFY(entitiesByRecordsCount[count].emplace(e.first).second); + AFL_VERIFY(entitiesByRecordsCount[count].size() <= (ui32)batch->num_columns()); + if (entitiesByRecordsCount[count].size() == (ui32)batch->num_columns()) { + ++pagesRestore; + } } + AFL_VERIFY(count == portionRecordsCount); } - Cerr << sb << Endl; - } - if (ExpectBlobsCount) { - Y_ABORT_UNLESS(*ExpectBlobsCount == blobsCount); - } - if (ExpectSlicesCount) { - Y_ABORT_UNLESS(*ExpectSlicesCount == slicesCount); - } - Y_ABORT_UNLESS(hasMultiSplit == HasMultiSplit); - for (auto&& i : blobsSize) { - Y_ABORT_UNLESS(i < NKikimr::NOlap::TSplitSettings().GetMaxBlobSize()); - Y_ABORT_UNLESS(i + 10000 >= NKikimr::NOlap::TSplitSettings().GetMinBlobSize() || blobsSize.size() == 1); - } - Y_ABORT_UNLESS(restoredBatch.size() == (ui32)batch->num_columns()); - for (auto&& i : batch->schema()->fields()) { - auto it = restoredBatch.find(i->name()); - Y_ABORT_UNLESS(it != restoredBatch.end()); - auto column = batch->GetColumnByName(i->name()); - Y_ABORT_UNLESS(column); - ui64 recordsCount = 0; - for (auto&& c : it->second) { - Y_ABORT_UNLESS(c->num_columns() == 1); - Y_ABORT_UNLESS(c->column(0)->RangeEquals(column, 0, c->num_rows(), recordsCount, arrow::EqualOptions::Defaults())); - recordsCount += c->num_rows(); + AFL_VERIFY(entitiesByRecordsCount.size() >= i.size()); + AFL_VERIFY(pagesRestore == pagesOriginal || batch->num_columns() == 1)("restore", pagesRestore)("original", pagesOriginal); + for (auto&& c : entityChunks.begin()->second) { + portionShift += c->GetRecordsCountVerified(); } - Y_ABORT_UNLESS(recordsCount == (ui32)batch->num_rows()); - } + AFL_VERIFY(portionShift = batch->num_rows()); + AFL_VERIFY(pagesSum == generalSlices.size())("sum", pagesSum)("general_slices", generalSlices.size()); + AFL_VERIFY(internalSplitsCount == ExpectedInternalSplitsCount.value_or(internalSplitsCount))("expected", *ExpectedInternalSplitsCount)("real", internalSplitsCount); + AFL_VERIFY(blobsCount == ExpectBlobsCount.value_or(blobsCount))("blobs_count", blobsCount)("expected", *ExpectBlobsCount); + AFL_VERIFY(pagesSum == ExpectSlicesCount.value_or(pagesSum))("sum", pagesSum)("expected", *ExpectSlicesCount); + AFL_VERIFY(portionsCount == ExpectPortionsCount.value_or(portionsCount))("portions_count", portionsCount)("expected", *ExpectPortionsCount); + AFL_VERIFY(chunksCount == ExpectChunksCount.value_or(chunksCount))("chunks_count", chunksCount)("expected", *ExpectChunksCount); + } }; @@ -157,7 +185,7 @@ Y_UNIT_TEST_SUITE(Splitter) { std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({column}).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(1).SetHasMultiSplit(true).Execute(batch); + TSplitTester().SetExpectBlobsCount(1).SetExpectSlicesCount(8).Execute(batch); } Y_UNIT_TEST(Minimal) { @@ -188,15 +216,36 @@ Y_UNIT_TEST_SUITE(Splitter) { TSplitTester().SetExpectBlobsCount(8).SetExpectSlicesCount(8).Execute(batch); } + Y_UNIT_TEST(CritSmallPortions) { + NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( + "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); + NKikimr::NColumnShard::TIndexationCounters counters("test"); + + TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(80).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(40) + .Execute(batch, NKikimr::NOlap::NSplitter::TSplitSettings().SetMinRecordsCount(1000).SetMaxPortionSize(8000000)); + } + Y_UNIT_TEST(Crit) { NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( - "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 712)); + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); NConstruction::IArrayBuilder::TPtr columnSmall = std::make_shared>( "field2", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 128)); std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig, columnSmall}).BuildBatch(80048); NKikimr::NColumnShard::TIndexationCounters counters("test"); - TSplitTester().SetExpectBlobsCount(16).SetExpectSlicesCount(8).Execute(batch); + TSplitTester().SetExpectBlobsCount(80).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(8).SetExpectPortionsCount(8).Execute(batch); + } + + Y_UNIT_TEST(CritSimple) { + NConstruction::IArrayBuilder::TPtr columnBig = std::make_shared>( + "field1", NKikimr::NArrow::NConstruction::TStringPoolFiller(8, 7120)); + std::shared_ptr batch = NKikimr::NArrow::NConstruction::TRecordBatchConstructor({columnBig}).BuildBatch(80048); + NKikimr::NColumnShard::TIndexationCounters counters("test"); + + TSplitTester().SetExpectBlobsCount(72).SetExpectSlicesCount(8).SetExpectedInternalSplitsCount(0).SetExpectPortionsCount(8).Execute(batch); } }; diff --git a/ydb/core/tx/columnshard/splitter/ut/ya.make b/ydb/core/tx/columnshard/splitter/ut/ya.make index 4d9e9af04665..24d266bffa8e 100644 --- a/ydb/core/tx/columnshard/splitter/ut/ya.make +++ b/ydb/core/tx/columnshard/splitter/ut/ya.make @@ -8,12 +8,16 @@ PEERDIR( ydb/core/tx/columnshard/counters ydb/core/tx/columnshard/engines/portions + ydb/core/tx/columnshard/common + ydb/core/tx/columnshard/blobs_action + ydb/core/tx/columnshard/data_sharing ydb/core/kqp/common ydb/library/yql/parser/pg_wrapper ydb/library/yql/public/udf ydb/core/persqueue ydb/core/kqp/session_actor ydb/core/tx/tx_proxy + ydb/core/tx/columnshard/engines/storage/chunks ydb/core/tx ydb/core/mind ydb/library/yql/minikql/comp_nodes/llvm14 diff --git a/ydb/core/tx/columnshard/splitter/ya.make b/ydb/core/tx/columnshard/splitter/ya.make index 9f2f0719a9a2..5f6c60cdf1ff 100644 --- a/ydb/core/tx/columnshard/splitter/ya.make +++ b/ydb/core/tx/columnshard/splitter/ya.make @@ -4,7 +4,7 @@ SRCS( batch_slice.cpp chunks.cpp simple.cpp - rb_splitter.cpp + similar_packer.cpp stats.cpp column_info.cpp settings.cpp @@ -15,7 +15,8 @@ SRCS( PEERDIR( contrib/libs/apache/arrow - ydb/core/tx/columnshard/engines/storage + ydb/core/tx/columnshard/splitter/abstract + ydb/core/tx/columnshard/engines/scheme ) END() diff --git a/ydb/core/tx/columnshard/tables_manager.cpp b/ydb/core/tx/columnshard/tables_manager.cpp index 4f628419226f..e7d3ca6ba0c8 100644 --- a/ydb/core/tx/columnshard/tables_manager.cpp +++ b/ydb/core/tx/columnshard/tables_manager.cpp @@ -37,13 +37,18 @@ bool TTablesManager::FillMonitoringReport(NTabletFlatExecutor::TTransactionConte } } json.InsertValue("tables_count", Tables.size()); - json.InsertValue("presets_count", SchemaPresets.size()); + json.InsertValue("presets_count", SchemaPresetsIds.size()); json.InsertValue("to_drop_count", PathsToDrop.size()); return true; } bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { + using TTableVersionsInfo = TVersionedSchema; + + THashMap schemaPresets; + THashMap tableVersions; { + TMemoryProfileGuard g("TTablesManager/InitFromDB::Tables"); auto rowset = db.Table().Select(); if (!rowset.IsReady()) { return false; @@ -57,7 +62,9 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { if (table.IsDropped()) { PathsToDrop.insert(table.GetPathId()); } - Tables.insert_or_assign(table.GetPathId(), std::move(table)); + + AFL_VERIFY(tableVersions.emplace(table.GetPathId(), TTableVersionsInfo()).second); + AFL_VERIFY(Tables.emplace(table.GetPathId(), std::move(table)).second); if (!rowset.Next()) { return false; @@ -67,6 +74,7 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { bool isFakePresetOnly = true; { + TMemoryProfileGuard g("TTablesManager/InitFromDB::SchemaPresets"); auto rowset = db.Table().Select(); if (!rowset.IsReady()) { return false; @@ -82,7 +90,8 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { Y_VERIFY_S(preset.GetName() == "default", "Preset name: " + preset.GetName()); isFakePresetOnly = false; } - SchemaPresets.insert_or_assign(preset.GetId(), preset); + AFL_VERIFY(schemaPresets.emplace(preset.GetId(), preset).second); + AFL_VERIFY(SchemaPresetsIds.emplace(preset.GetId()).second); if (!rowset.Next()) { return false; } @@ -90,6 +99,7 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { } { + TMemoryProfileGuard g("TTablesManager/InitFromDB::Versions"); auto rowset = db.Table().Select(); if (!rowset.IsReady()) { return false; @@ -101,13 +111,14 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { Y_ABORT_UNLESS(Tables.contains(pathId)); NOlap::TSnapshot version( rowset.GetValue(), - rowset.GetValue()); + rowset.GetValue()); - auto& table = Tables.at(pathId); - TTableInfo::TTableVersionInfo versionInfo; + auto& table = Tables[pathId]; + auto& versionsInfo = tableVersions[pathId]; + NKikimrTxColumnShard::TTableVersionInfo versionInfo; Y_ABORT_UNLESS(versionInfo.ParseFromString(rowset.GetValue())); AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("event", "load_table_version")("path_id", pathId)("snapshot", version)("version", versionInfo.HasSchema() ? versionInfo.GetSchema().GetVersion() : -1); - Y_ABORT_UNLESS(SchemaPresets.contains(versionInfo.GetSchemaPresetId())); + Y_ABORT_UNLESS(schemaPresets.contains(versionInfo.GetSchemaPresetId())); if (!table.IsDropped()) { auto& ttlSettings = versionInfo.GetTtlSettings(); @@ -120,7 +131,8 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { } } } - table.AddVersion(version, versionInfo); + table.AddVersion(version); + versionsInfo.AddVersion(version, versionInfo); if (!rowset.Next()) { return false; } @@ -128,6 +140,7 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { } { + TMemoryProfileGuard g("TTablesManager/InitFromDB::PresetVersions"); auto rowset = db.Table().Select(); if (!rowset.IsReady()) { return false; @@ -135,8 +148,8 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { while (!rowset.EndOfSet()) { const ui32 id = rowset.GetValue(); - Y_ABORT_UNLESS(SchemaPresets.contains(id)); - auto& preset = SchemaPresets.at(id); + Y_ABORT_UNLESS(schemaPresets.contains(id)); + auto& preset = schemaPresets[id]; NOlap::TSnapshot version( rowset.GetValue(), rowset.GetValue()); @@ -151,19 +164,27 @@ bool TTablesManager::InitFromDB(NIceDb::TNiceDb& db) { } } - for (const auto& [id, preset] : SchemaPresets) { + TMemoryProfileGuard g("TTablesManager/InitFromDB::Other"); + for (const auto& [id, preset] : schemaPresets) { if (isFakePresetOnly) { Y_ABORT_UNLESS(id == 0); } else { Y_ABORT_UNLESS(id > 0); } - for (const auto& [version, schemaInfo] : preset.GetVersions()) { + for (const auto& [version, schemaInfo] : preset.GetVersionsById()) { if (schemaInfo.HasSchema()) { AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "index_schema")("preset_id", id)("snapshot", version)("version", schemaInfo.GetSchema().GetVersion()); - IndexSchemaVersion(version, schemaInfo.GetSchema()); + if (!PrimaryIndex) { + PrimaryIndex = std::make_unique(TabletId, StoragesManager, preset.GetMinVersionForId(schemaInfo.GetSchema().GetVersion()), schemaInfo.GetSchema()); + } else { + PrimaryIndex->RegisterSchemaVersion(preset.GetMinVersionForId(schemaInfo.GetSchema().GetVersion()), schemaInfo.GetSchema()); + } } } } + for (auto&& i : Tables) { + PrimaryIndex->RegisterTable(i.first); + } return true; } @@ -176,11 +197,14 @@ bool TTablesManager::LoadIndex(NOlap::TDbWrapper& idxDB) { return true; } -bool TTablesManager::HasTable(const ui64 pathId) const { +bool TTablesManager::HasTable(const ui64 pathId, bool withDeleted) const { auto it = Tables.find(pathId); - if (it == Tables.end() || it->second.IsDropped()) { + if (it == Tables.end()) { return false; } + if (it->second.IsDropped()) { + return withDeleted; + } return true; } @@ -189,7 +213,7 @@ bool TTablesManager::IsReadyForWrite(const ui64 pathId) const { } bool TTablesManager::HasPreset(const ui32 presetId) const { - return SchemaPresets.contains(presetId); + return SchemaPresetsIds.contains(presetId); } const TTableInfo& TTablesManager::GetTable(const ui64 pathId) const { @@ -201,8 +225,7 @@ ui64 TTablesManager::GetMemoryUsage() const { ui64 memory = Tables.size() * sizeof(TTableInfo) + PathsToDrop.size() * sizeof(ui64) + - Ttl.PathsCount() * sizeof(TTtl::TDescription) + - SchemaPresets.size() * sizeof(TSchemaPreset); + Ttl.PathsCount() * sizeof(TTtl::TDescription); if (PrimaryIndex) { memory += PrimaryIndex->MemoryUsage(); } @@ -210,20 +233,17 @@ ui64 TTablesManager::GetMemoryUsage() const { } void TTablesManager::DropTable(const ui64 pathId, const NOlap::TSnapshot& version, NIceDb::TNiceDb& db) { - auto& table = Tables.at(pathId); + AFL_VERIFY(Tables.contains(pathId)); + auto& table = Tables[pathId]; table.SetDropVersion(version); PathsToDrop.insert(pathId); Ttl.DropPathTtl(pathId); - if (PrimaryIndex) { - PrimaryIndex->OnTieringModified(nullptr, Ttl); - } Schema::SaveTableDropVersion(db, pathId, version.GetPlanStep(), version.GetTxId()); } void TTablesManager::DropPreset(const ui32 presetId, const NOlap::TSnapshot& version, NIceDb::TNiceDb& db) { - auto& preset = SchemaPresets.at(presetId); - Y_ABORT_UNLESS(preset.GetName() != "default", "Cannot drop the default preset"); - preset.SetDropVersion(version); + AFL_VERIFY(SchemaPresetsIds.contains(presetId)); + SchemaPresetsIds.erase(presetId); Schema::SaveSchemaPresetDropVersion(db, presetId, version); } @@ -233,24 +253,23 @@ void TTablesManager::RegisterTable(TTableInfo&& table, NIceDb::TNiceDb& db) { Schema::SaveTableInfo(db, table.GetPathId(), table.GetTieringUsage()); const ui64 pathId = table.GetPathId(); - Tables.insert_or_assign(pathId, std::move(table)); + AFL_VERIFY(Tables.emplace(pathId, std::move(table)).second); if (PrimaryIndex) { PrimaryIndex->RegisterTable(pathId); } } bool TTablesManager::RegisterSchemaPreset(const TSchemaPreset& schemaPreset, NIceDb::TNiceDb& db) { - if (SchemaPresets.contains(schemaPreset.GetId())) { + if (SchemaPresetsIds.contains(schemaPreset.GetId())) { return false; } + SchemaPresetsIds.emplace(schemaPreset.GetId()); Schema::SaveSchemaPresetInfo(db, schemaPreset.GetId(), schemaPreset.GetName()); - SchemaPresets.insert_or_assign(schemaPreset.GetId(), schemaPreset); return true; } void TTablesManager::AddSchemaVersion(const ui32 presetId, const NOlap::TSnapshot& version, const NKikimrSchemeOp::TColumnTableSchema& schema, NIceDb::TNiceDb& db) { - Y_ABORT_UNLESS(SchemaPresets.contains(presetId)); - auto preset = SchemaPresets.at(presetId); + Y_ABORT_UNLESS(SchemaPresetsIds.contains(presetId)); TSchemaPreset::TSchemaPresetVersionInfo versionInfo; versionInfo.SetId(presetId); @@ -258,32 +277,37 @@ void TTablesManager::AddSchemaVersion(const ui32 presetId, const NOlap::TSnapsho versionInfo.SetSinceTxId(version.GetTxId()); *versionInfo.MutableSchema() = schema; - auto& schemaPreset = SchemaPresets.at(presetId); Schema::SaveSchemaPresetVersionInfo(db, presetId, version, versionInfo); - schemaPreset.AddVersion(version, versionInfo); - if (versionInfo.HasSchema()){ - IndexSchemaVersion(version, versionInfo.GetSchema()); + if (versionInfo.HasSchema()) { + if (!PrimaryIndex) { + PrimaryIndex = std::make_unique(TabletId, StoragesManager, version, schema); + for (auto&& i : Tables) { + PrimaryIndex->RegisterTable(i.first); + } + } else { + PrimaryIndex->RegisterSchemaVersion(version, schema); + } for (auto& columnName : Ttl.TtlColumns()) { PrimaryIndex->GetVersionedIndex().GetLastSchema()->GetIndexInfo().CheckTtlColumn(columnName); } } } -void TTablesManager::AddTableVersion(const ui64 pathId, const NOlap::TSnapshot& version, const TTableInfo::TTableVersionInfo& versionInfo, NIceDb::TNiceDb& db) { +void TTablesManager::AddTableVersion(const ui64 pathId, const NOlap::TSnapshot& version, const NKikimrTxColumnShard::TTableVersionInfo& versionInfo, NIceDb::TNiceDb& db, std::shared_ptr& manager) { auto it = Tables.find(pathId); AFL_VERIFY(it != Tables.end()); auto& table = it->second; if (versionInfo.HasSchemaPresetId()) { - Y_ABORT_UNLESS(SchemaPresets.contains(versionInfo.GetSchemaPresetId())); + Y_ABORT_UNLESS(SchemaPresetsIds.contains(versionInfo.GetSchemaPresetId())); } else if (versionInfo.HasSchema()) { TSchemaPreset fakePreset; - if (SchemaPresets.empty()) { + if (SchemaPresetsIds.empty()) { TSchemaPreset fakePreset; Y_ABORT_UNLESS(RegisterSchemaPreset(fakePreset, db)); AddSchemaVersion(fakePreset.GetId(), version, versionInfo.GetSchema(), db); } else { - Y_ABORT_UNLESS(SchemaPresets.contains(fakePreset.GetId())); + Y_ABORT_UNLESS(SchemaPresetsIds.contains(fakePreset.GetId())); AddSchemaVersion(fakePreset.GetId(), version, versionInfo.GetSchema(), db); } } @@ -295,34 +319,12 @@ void TTablesManager::AddTableVersion(const ui64 pathId, const NOlap::TSnapshot& } else { Ttl.DropPathTtl(pathId); } - if (PrimaryIndex) { - PrimaryIndex->OnTieringModified(nullptr, Ttl); + if (PrimaryIndex && manager->IsReady()) { + PrimaryIndex->OnTieringModified(manager, Ttl, pathId); } } Schema::SaveTableVersionInfo(db, pathId, version, versionInfo); - table.AddVersion(version, versionInfo); -} - -void TTablesManager::IndexSchemaVersion(const NOlap::TSnapshot& snapshot, const NKikimrSchemeOp::TColumnTableSchema& schema) { - NOlap::TIndexInfo indexInfo = DeserializeIndexInfoFromProto(schema); - indexInfo.SetAllKeys(); - const bool isFirstPrimaryIndexInitialization = !PrimaryIndex; - if (!PrimaryIndex) { - PrimaryIndex = std::make_unique(TabletId, NOlap::TCompactionLimits(), StoragesManager); - } - PrimaryIndex->RegisterSchemaVersion(snapshot, std::move(indexInfo)); - if (isFirstPrimaryIndexInitialization) { - for (auto&& i : Tables) { - PrimaryIndex->RegisterTable(i.first); - } - } - PrimaryIndex->OnTieringModified(nullptr, Ttl); -} - -NOlap::TIndexInfo TTablesManager::DeserializeIndexInfoFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema) { - std::optional indexInfo = NOlap::TIndexInfo::BuildFromProto(schema); - Y_ABORT_UNLESS(indexInfo); - return *indexInfo; + table.AddVersion(version); } TTablesManager::TTablesManager(const std::shared_ptr& storagesManager, const ui64 tabletId) @@ -331,23 +333,29 @@ TTablesManager::TTablesManager(const std::shared_ptr& s { } -bool TTablesManager::TryFinalizeDropPath(NTabletFlatExecutor::TTransactionContext& txc, const ui64 pathId) { +bool TTablesManager::TryFinalizeDropPathOnExecute(NTable::TDatabase& dbTable, const ui64 pathId) const { auto itDrop = PathsToDrop.find(pathId); - if (itDrop == PathsToDrop.end()) { - return false; - } - if (GetPrimaryIndexSafe().HasDataInPathId(pathId)) { - return false; - } - PathsToDrop.erase(itDrop); - NIceDb::TNiceDb db(txc.DB); + AFL_VERIFY(itDrop != PathsToDrop.end()); + AFL_VERIFY(!GetPrimaryIndexSafe().HasDataInPathId(pathId)); + NIceDb::TNiceDb db(dbTable); NColumnShard::Schema::EraseTableInfo(db, pathId); - const auto& table = Tables.find(pathId); - Y_ABORT_UNLESS(table != Tables.end(), "No schema for path %lu", pathId); - for (auto&& tableVersion : table->second.GetVersions()) { - NColumnShard::Schema::EraseTableVersionInfo(db, pathId, tableVersion.first); + const auto& itTable = Tables.find(pathId); + AFL_VERIFY(itTable != Tables.end())("problem", "No schema for path")("path_id", pathId); + for (auto&& tableVersion : itTable->second.GetVersions()) { + NColumnShard::Schema::EraseTableVersionInfo(db, pathId, tableVersion); } return true; } +bool TTablesManager::TryFinalizeDropPathOnComplete(const ui64 pathId) { + auto itDrop = PathsToDrop.find(pathId); + AFL_VERIFY(itDrop != PathsToDrop.end()); + AFL_VERIFY(!GetPrimaryIndexSafe().HasDataInPathId(pathId)); + PathsToDrop.erase(itDrop); + const auto& itTable = Tables.find(pathId); + AFL_VERIFY(itTable != Tables.end())("problem", "No schema for path")("path_id", pathId); + Tables.erase(itTable); + return true; +} + } diff --git a/ydb/core/tx/columnshard/tables_manager.h b/ydb/core/tx/columnshard/tables_manager.h index 725bf197c284..60e1a5f66dbd 100644 --- a/ydb/core/tx/columnshard/tables_manager.h +++ b/ydb/core/tx/columnshard/tables_manager.h @@ -13,43 +13,40 @@ namespace NKikimr::NColumnShard { -template +template class TVersionedSchema { -protected: - std::optional DropVersion; - TMap Versions; - +private: + TMap Versions; + TMap VersionsById; + TMap MinVersionById; public: - bool IsDropped() const { - return DropVersion.has_value(); - } - bool IsEmpty() const { - return Versions.empty(); + return VersionsById.empty(); } - void SetDropVersion(const NOlap::TSnapshot& version) { - DropVersion = version; + const TMap& GetVersionsById() const { + return VersionsById; } - const TMap& GetVersions() const { - return Versions; + NOlap::TSnapshot GetMinVersionForId(const ui64 sVersion) const { + auto it = MinVersionById.find(sVersion); + Y_ABORT_UNLESS(it != MinVersionById.end()); + return it->second; } - const TSchemaProto& GetVersion(const NOlap::TSnapshot& version) const { - const TSchemaProto* result = nullptr; - for (auto ver : Versions) { - if (ver.first > version) { - break; - } - result = &ver.second; + void AddVersion(const NOlap::TSnapshot& snapshot, const TVersionData& versionInfo) { + ui64 ssVersion = 0; + if (versionInfo.HasSchema()) { + ssVersion = versionInfo.GetSchema().GetVersion(); } - Y_ABORT_UNLESS(!!result); - return *result; - } + VersionsById.emplace(ssVersion, versionInfo); + Y_ABORT_UNLESS(Versions.emplace(snapshot, ssVersion).second); - void AddVersion(const NOlap::TSnapshot& version, const TSchemaProto& versionInfo) { - Versions[version] = versionInfo; + if (MinVersionById.contains(ssVersion)) { + MinVersionById.emplace(ssVersion, std::min(snapshot, MinVersionById.at(ssVersion))); + } else { + MinVersionById.emplace(ssVersion, snapshot); + } } }; @@ -80,22 +77,16 @@ class TSchemaPreset : public TVersionedSchema(); } Y_ABORT_UNLESS(!Id || Name == "default", "Unsupported preset at load time"); - - if (rowset.template HaveValue() && - rowset.template HaveValue()) - { - DropVersion.emplace(rowset.template GetValue(), - rowset.template GetValue()); - } return true; } }; -class TTableInfo : public TVersionedSchema { +class TTableInfo { public: - using TTableVersionInfo = NKikimrTxColumnShard::TTableVersionInfo; ui64 PathId; TString TieringUsage; + std::optional DropVersion; + YDB_READONLY_DEF(TSet, Versions); public: const TString& GetTieringUsage() const { @@ -107,10 +98,26 @@ class TTableInfo : public TVersionedSchema Tables; - THashMap SchemaPresets; + THashSet SchemaPresetsIds; THashSet PathsToDrop; TTtl Ttl; std::unique_ptr PrimaryIndex; @@ -140,14 +147,15 @@ class TTablesManager { public: TTablesManager(const std::shared_ptr& storagesManager, const ui64 tabletId); - bool TryFinalizeDropPath(NTabletFlatExecutor::TTransactionContext& txc, const ui64 pathId); + bool TryFinalizeDropPathOnExecute(NTable::TDatabase& dbTable, const ui64 pathId) const; + bool TryFinalizeDropPathOnComplete(const ui64 pathId); const TTtl& GetTtl() const { return Ttl; } - void AddTtls(THashMap& eviction) { - Ttl.AddTtls(eviction); + bool AddTtls(THashMap& eviction) { + return Ttl.AddTtls(eviction); } const THashSet& GetPathsToDrop() const { @@ -162,8 +170,8 @@ class TTablesManager { return Tables; } - const THashMap& GetSchemaPresets() const { - return SchemaPresets; + const THashSet& GetSchemaPresets() const { + return SchemaPresetsIds; } bool HasPrimaryIndex() const { @@ -189,13 +197,39 @@ class TTablesManager { return *PrimaryIndex; } + template + TIndex& MutablePrimaryIndexAsVerified() { + AFL_VERIFY(!!PrimaryIndex); + auto result = dynamic_cast(PrimaryIndex.get()); + AFL_VERIFY(result); + return *result; + } + + template + const TIndex& GetPrimaryIndexAsVerified() const { + AFL_VERIFY(!!PrimaryIndex); + auto result = dynamic_cast(PrimaryIndex.get()); + AFL_VERIFY(result); + return *result; + } + + template + const TIndex* GetPrimaryIndexAsOptional() const { + if (!PrimaryIndex) { + return nullptr; + } + auto result = dynamic_cast(PrimaryIndex.get()); + AFL_VERIFY(result); + return result; + } + bool InitFromDB(NIceDb::TNiceDb& db); bool LoadIndex(NOlap::TDbWrapper& db); const TTableInfo& GetTable(const ui64 pathId) const; ui64 GetMemoryUsage() const; - bool HasTable(const ui64 pathId) const; + bool HasTable(const ui64 pathId, bool withDeleted = false) const; bool IsReadyForWrite(const ui64 pathId) const; bool HasPreset(const ui32 presetId) const; @@ -206,11 +240,8 @@ class TTablesManager { bool RegisterSchemaPreset(const TSchemaPreset& schemaPreset, NIceDb::TNiceDb& db); void AddSchemaVersion(const ui32 presetId, const NOlap::TSnapshot& version, const NKikimrSchemeOp::TColumnTableSchema& schema, NIceDb::TNiceDb& db); - void AddTableVersion(const ui64 pathId, const NOlap::TSnapshot& version, const TTableInfo::TTableVersionInfo& versionInfo, NIceDb::TNiceDb& db); + void AddTableVersion(const ui64 pathId, const NOlap::TSnapshot& version, const NKikimrTxColumnShard::TTableVersionInfo& versionInfo, NIceDb::TNiceDb& db, std::shared_ptr& manager); bool FillMonitoringReport(NTabletFlatExecutor::TTransactionContext& txc, NJson::TJsonValue& json); -private: - void IndexSchemaVersion(const NOlap::TSnapshot& version, const NKikimrSchemeOp::TColumnTableSchema& schema); - static NOlap::TIndexInfo DeserializeIndexInfoFromProto(const NKikimrSchemeOp::TColumnTableSchema& schema); }; } diff --git a/ydb/core/tx/columnshard/test_helper/controllers.cpp b/ydb/core/tx/columnshard/test_helper/controllers.cpp new file mode 100644 index 000000000000..d9ee86446c61 --- /dev/null +++ b/ydb/core/tx/columnshard/test_helper/controllers.cpp @@ -0,0 +1,22 @@ +#include "controllers.h" +#include +#include +#include + +namespace NKikimr::NOlap { + +void TWaitCompactionController::OnTieringModified(const std::shared_ptr& /*tiers*/) { + ++TiersModificationsCount; + AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "OnTieringModified")("count", TiersModificationsCount); +} + +void TWaitCompactionController::SetTiersSnapshot(TTestBasicRuntime& runtime, const TActorId& tabletActorId, const NMetadata::NFetcher::ISnapshot::TPtr& snapshot) { + CurrentConfig = snapshot; + ui32 startCount = TiersModificationsCount; + NTxUT::ProvideTieringSnapshot(runtime, tabletActorId, snapshot); + while (TiersModificationsCount == startCount) { + runtime.SimulateSleep(TDuration::Seconds(1)); + } +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/test_helper/controllers.h b/ydb/core/tx/columnshard/test_helper/controllers.h new file mode 100644 index 000000000000..682f2dcacaf8 --- /dev/null +++ b/ydb/core/tx/columnshard/test_helper/controllers.h @@ -0,0 +1,57 @@ +#pragma once +#include +#include + +namespace NKikimr::NOlap { + +class TWaitCompactionController: public NYDBTest::NColumnShard::TController { +private: + using TBase = NKikimr::NYDBTest::ICSController; + TAtomicCounter ExportsFinishedCount = 0; + NMetadata::NFetcher::ISnapshot::TPtr CurrentConfig; + ui32 TiersModificationsCount = 0; + YDB_READONLY(TAtomicCounter, StatisticsUsageCount, 0); + YDB_READONLY(TAtomicCounter, MaxValueUsageCount, 0); +protected: + virtual void OnTieringModified(const std::shared_ptr& /*tiers*/) override; + virtual void OnExportFinished() override { + ExportsFinishedCount.Inc(); + } + virtual bool NeedForceCompactionBacketsConstruction() const override { + return true; + } + virtual ui64 GetSmallPortionSizeDetector(const ui64 /*def*/) const override { + return 0; + } + virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { + return TDuration::Zero(); + } + virtual TDuration GetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { + return TDuration::Zero(); + } + virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { + return TDuration::Seconds(1); + } +public: + ui32 GetFinishedExportsCount() const { + return ExportsFinishedCount.Val(); + } + + virtual void OnStatisticsUsage(const NKikimr::NOlap::NStatistics::TOperatorContainer& /*statOperator*/) override { + StatisticsUsageCount.Inc(); + } + virtual void OnMaxValueUsage() override { + MaxValueUsageCount.Inc(); + } + void SetTiersSnapshot(TTestBasicRuntime& runtime, const TActorId& tabletActorId, const NMetadata::NFetcher::ISnapshot::TPtr& snapshot); + + virtual NMetadata::NFetcher::ISnapshot::TPtr GetFallbackTiersSnapshot() const override { + if (CurrentConfig) { + return CurrentConfig; + } else { + return TBase::GetFallbackTiersSnapshot(); + } + } +}; + +} diff --git a/ydb/core/tx/columnshard/test_helper/helper.cpp b/ydb/core/tx/columnshard/test_helper/helper.cpp new file mode 100644 index 000000000000..fc8fea56e84c --- /dev/null +++ b/ydb/core/tx/columnshard/test_helper/helper.cpp @@ -0,0 +1,91 @@ +#include "helper.h" +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace NKikimr::NArrow::NTest { + +NKikimrSchemeOp::TOlapColumnDescription TTestColumn::CreateColumn(const ui32 id) const { + NKikimrSchemeOp::TOlapColumnDescription col; + col.SetId(id); + col.SetName(Name); + if (StorageId) { + col.SetStorageId(StorageId); + } + auto columnType = NScheme::ProtoColumnTypeFromTypeInfoMod(Type, ""); + col.SetTypeId(columnType.TypeId); + if (columnType.TypeInfo) { + *col.MutableTypeInfo() = *columnType.TypeInfo; + } + return col; +} + +std::vector> TTestColumn::ConvertToPairs(const std::vector& columns) { + std::vector> result; + for (auto&& i : columns) { + result.emplace_back(std::make_pair(i.GetName(), i.GetType())); + } + return result; +} + +std::vector TTestColumn::BuildFromPairs(const std::vector>& columns) { + std::vector result; + for (auto&& i : columns) { + result.emplace_back(i.first, i.second); + } + return result; +} + +THashMap TTestColumn::ConvertToHash(const std::vector& columns) { + THashMap result; + for (auto&& i : columns) { + result.emplace(i.GetName(), i.GetType()); + } + return result; +} + +std::vector TTestColumn::CropSchema(const std::vector& input, const ui32 size) { + AFL_VERIFY(input.size() >= size); + return std::vector(input.begin(), input.begin() + size); +} + +} + +namespace NKikimr::NArrow { + +std::vector> MakeArrowFields(const std::vector& columns, const std::set& notNullColumns /*= {}*/) { + auto result = MakeArrowFields(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); + UNIT_ASSERT_C(result.ok(), result.status().ToString()); + return result.ValueUnsafe(); +} + +std::shared_ptr MakeArrowSchema(const std::vector& columns, const std::set& notNullColumns /*= {}*/) { + auto result = MakeArrowSchema(NTest::TTestColumn::ConvertToPairs(columns), notNullColumns); + UNIT_ASSERT_C(result.ok(), result.status().ToString()); + return result.ValueUnsafe(); +} + +} + +namespace NKikimr::NOlap { + +std::shared_ptr TTestStoragesManager::DoBuildOperator(const TString& storageId) { + if (storageId == TBase::DefaultStorageId) { + return std::make_shared(storageId, NActors::TActorId(), TabletInfo, + 1, SharedBlobsManager->GetStorageManagerGuarantee(TBase::DefaultStorageId)); + } else if (storageId == TBase::MemoryStorageId) { + Singleton()->SetSecretKey("fakeSecret"); + return std::make_shared(storageId, NActors::TActorId(), std::make_shared("fakeBucket", "fakeSecret"), + SharedBlobsManager->GetStorageManagerGuarantee(storageId)); + } else { + return nullptr; + } +} + +} \ No newline at end of file diff --git a/ydb/core/tx/columnshard/test_helper/helper.h b/ydb/core/tx/columnshard/test_helper/helper.h new file mode 100644 index 000000000000..4501cfe5fea4 --- /dev/null +++ b/ydb/core/tx/columnshard/test_helper/helper.h @@ -0,0 +1,75 @@ +#pragma once +#include +#include + +#include + +#include + +namespace NKikimrSchemeOp { +class TOlapColumnDescription; +} + +namespace NKikimr::NOlap { + +class TTestStoragesManager: public NOlap::IStoragesManager { +private: + using TBase = NOlap::IStoragesManager; + TIntrusivePtr TabletInfo = new TTabletStorageInfo(); + std::shared_ptr SharedBlobsManager = std::make_shared(NOlap::TTabletId(0)); +protected: + virtual bool DoLoadIdempotency(NTable::TDatabase& /*database*/) override { + return true; + } + + virtual std::shared_ptr DoBuildOperator(const TString& storageId) override; + virtual const std::shared_ptr& DoGetSharedBlobsManager() const override { + return SharedBlobsManager; + } +public: + + static std::shared_ptr GetInstance() { + static auto result = std::make_shared(); + static TMutex mutex; + static bool initialized = false; + TGuard g(mutex); + if (!initialized) { + result->Initialize(); + } + initialized = true; + return result; + } + +}; + +} + +namespace NKikimr::NArrow::NTest { + +class TTestColumn { +private: + YDB_ACCESSOR_DEF(TString, Name); + YDB_ACCESSOR_DEF(NScheme::TTypeInfo, Type); + YDB_ACCESSOR_DEF(TString, StorageId); +public: + explicit TTestColumn(const TString& name, const NScheme::TTypeInfo& type) + : Name(name) + , Type(type) { + + } + + NKikimrSchemeOp::TOlapColumnDescription CreateColumn(const ui32 id) const; + static std::vector> ConvertToPairs(const std::vector& columns); + static THashMap ConvertToHash(const std::vector& columns); + static std::vector BuildFromPairs(const std::vector>& columns); + static std::vector CropSchema(const std::vector& input, const ui32 size); +}; + +} + +namespace NKikimr::NArrow { + +std::vector> MakeArrowFields(const std::vector& columns, const std::set& notNullColumns = {}); +std::shared_ptr MakeArrowSchema(const std::vector& columns, const std::set& notNullColumns = {}); + +} diff --git a/ydb/core/tx/columnshard/test_helper/ya.make b/ydb/core/tx/columnshard/test_helper/ya.make new file mode 100644 index 000000000000..04b99f3aee70 --- /dev/null +++ b/ydb/core/tx/columnshard/test_helper/ya.make @@ -0,0 +1,20 @@ +LIBRARY() + +PEERDIR( + ydb/core/protos + contrib/libs/apache/arrow + ydb/library/actors/core + ydb/core/tx/columnshard/blobs_action/bs + ydb/core/tx/columnshard/blobs_action/tier + ydb/core/wrappers +) + +SRCS( + helper.cpp + controllers.cpp +) + +YQL_LAST_ABI_VERSION() + +END() + diff --git a/ydb/core/tx/columnshard/transactions/operators/backup.cpp b/ydb/core/tx/columnshard/transactions/operators/backup.cpp new file mode 100644 index 000000000000..f55d2b82b0b3 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/backup.cpp @@ -0,0 +1,67 @@ +#include "backup.h" +#include +#include + +namespace NKikimr::NColumnShard { + +bool TBackupTransactionOperator::Parse(const TString& data) { + NKikimrTxColumnShard::TBackupTxBody txBody; + if (!txBody.ParseFromString(data)) { + return false; + } + if (!txBody.HasBackupTask()) { + return false; + } + TConclusion id = NOlap::NExport::TIdentifier::BuildFromProto(txBody); + if (!id) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_id")("problem", id.GetErrorMessage()); + return false; + } + TConclusion selector = NOlap::NExport::TSelectorContainer::BuildFromProto(txBody); + if (!selector) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_selector")("problem", selector.GetErrorMessage()); + return false; + } + TConclusion storeInitializer = NOlap::NExport::TStorageInitializerContainer::BuildFromProto(txBody); + if (!storeInitializer) { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("event", "cannot_parse_selector")("problem", storeInitializer.GetErrorMessage()); + return false; + } + NArrow::NSerialization::TSerializerContainer serializer(std::make_shared()); + ExportTask = std::make_shared(id.DetachResult(), selector.DetachResult(), storeInitializer.DetachResult(), serializer); + return true; +} + +TBackupTransactionOperator::TProposeResult TBackupTransactionOperator::Propose(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& /*txc*/, bool /*proposed*/) const { + auto proposition = owner.GetExportsManager()->ProposeTask(ExportTask); + if (!proposition) { + return TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, + TStringBuilder() << "Invalid backup task TxId# " << GetTxId() << ": " << ExportTask->DebugString() << ": " << proposition.GetErrorMessage()); + } + return TProposeResult(); +} + +bool TBackupTransactionOperator::Progress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) { + Y_UNUSED(version); + AFL_VERIFY(ExportTask); + owner.GetExportsManager()->ConfirmSessionOnExecute(ExportTask->GetIdentifier(), txc); + return true; +} + +bool TBackupTransactionOperator::Complete(TColumnShard& owner, const TActorContext& ctx) { + AFL_VERIFY(ExportTask); + owner.GetExportsManager()->ConfirmSessionOnComplete(ExportTask->GetIdentifier()); + auto result = std::make_unique( + owner.TabletID(), TxInfo.TxKind, GetTxId(), NKikimrTxColumnShard::SUCCESS); + result->Record.SetStep(TxInfo.PlanStep); + ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); + AFL_VERIFY(owner.GetExportsManager()->GetSessionVerified(ExportTask->GetIdentifier())->Start(owner.GetStoragesManager(), (NOlap::TTabletId)owner.TabletID(), owner.SelfId())); + return true; +} + +bool TBackupTransactionOperator::Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) { + owner.GetExportsManager()->RemoveSession(ExportTask->GetIdentifier(), txc); + return true; +} + +} diff --git a/ydb/core/tx/columnshard/transactions/operators/backup.h b/ydb/core/tx/columnshard/transactions/operators/backup.h new file mode 100644 index 000000000000..bc873e3dbda0 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/operators/backup.h @@ -0,0 +1,29 @@ +#pragma once + +#include +#include +#include + +namespace NKikimr::NColumnShard { + + class TBackupTransactionOperator : public TTxController::ITransactionOperatior { + private: + std::shared_ptr ExportTask; + using TBase = TTxController::ITransactionOperatior; + using TProposeResult = TTxController::TProposeResult; + static inline auto Registrator = TFactory::TRegistrator(NKikimrTxColumnShard::TX_KIND_BACKUP); + public: + using TBase::TBase; + + virtual bool Parse(const TString& data) override; + + virtual TProposeResult Propose(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, bool /*proposed*/) const override; + + virtual bool Progress(TColumnShard& owner, const NOlap::TSnapshot& version, NTabletFlatExecutor::TTransactionContext& txc) override; + + virtual bool Complete(TColumnShard& owner, const TActorContext& ctx) override; + + virtual bool Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override; + }; + +} diff --git a/ydb/core/tx/columnshard/transactions/operators/ev_write.h b/ydb/core/tx/columnshard/transactions/operators/ev_write.h index b31f381df529..ca83ce8696ac 100644 --- a/ydb/core/tx/columnshard/transactions/operators/ev_write.h +++ b/ydb/core/tx/columnshard/transactions/operators/ev_write.h @@ -12,11 +12,16 @@ namespace NKikimr::NColumnShard { using TBase::TBase; virtual bool Parse(const TString& data) override { - Y_UNUSED(data); - return true; + NKikimrTxColumnShard::TCommitWriteTxBody commitTxBody; + if (!commitTxBody.ParseFromString(data)) { + return false; + } + LockId = commitTxBody.GetLockId(); + return !!LockId; } - TProposeResult Propose(TColumnShard& /*owner*/, NTabletFlatExecutor::TTransactionContext& /*txc*/, bool /*proposed*/) const override { + TProposeResult Propose(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, bool /*proposed*/) const override { + owner.OperationsManager->LinkTransaction(LockId, GetTxId(), txc); return TProposeResult(); } @@ -25,7 +30,7 @@ namespace NKikimr::NColumnShard { } virtual bool Complete(TColumnShard& owner, const TActorContext& ctx) override { - auto result = NEvents::TDataEvents::TEvWriteResult::BuildCommited(owner.TabletID(), GetTxId()); + auto result = NEvents::TDataEvents::TEvWriteResult::BuildCompleted(owner.TabletID(), GetTxId()); ctx.Send(TxInfo.Source, result.release(), 0, TxInfo.Cookie); return true; } @@ -33,6 +38,8 @@ namespace NKikimr::NColumnShard { virtual bool Abort(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc) override { return owner.OperationsManager->AbortTransaction(owner, GetTxId(), txc); } + private: + ui64 LockId = 0; }; } diff --git a/ydb/core/tx/columnshard/transactions/operators/schema.h b/ydb/core/tx/columnshard/transactions/operators/schema.h index f71da39fa110..58bb89c8c0f8 100644 --- a/ydb/core/tx/columnshard/transactions/operators/schema.h +++ b/ydb/core/tx/columnshard/transactions/operators/schema.h @@ -25,14 +25,18 @@ namespace NKikimr::NColumnShard { TProposeResult Propose(TColumnShard& owner, NTabletFlatExecutor::TTransactionContext& txc, bool /*proposed*/) const override { switch (SchemaTxBody.TxBody_case()) { case NKikimrTxColumnShard::TSchemaTxBody::kInitShard: + { + auto validationStatus = ValidateTables(SchemaTxBody.GetInitShard().GetTables()); + if (validationStatus.IsFail()) { + return TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "Invalid schema: " + validationStatus.GetErrorMessage()); + } + } break; case NKikimrTxColumnShard::TSchemaTxBody::kEnsureTables: - for (auto& table : SchemaTxBody.GetEnsureTables().GetTables()) { - if (table.HasSchemaPreset() && !ValidateTablePreset(table.GetSchemaPreset())) { - return TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, "Invalid schema"); - } - if (table.HasSchema() && !ValidateTableSchema(table.GetSchema())) { - return TProposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, "Invalid schema"); + { + auto validationStatus = ValidateTables(SchemaTxBody.GetEnsureTables().GetTables()); + if (validationStatus.IsFail()) { + return TProposeResult(NKikimrTxColumnShard::EResultStatus::SCHEMA_ERROR, "Invalid schema: " + validationStatus.GetErrorMessage()); } } break; @@ -91,10 +95,26 @@ namespace NKikimr::NColumnShard { } private: - bool ValidateTableSchema(const NKikimrSchemeOp::TColumnTableSchema& schema) const { - namespace NTypeIds = NScheme::NTypeIds; + TConclusionStatus ValidateTables(::google::protobuf::RepeatedPtrField<::NKikimrTxColumnShard::TCreateTable> tables) const { + for (auto& table : tables) { + if (table.HasSchemaPreset()) { + const auto validationStatus = ValidateTablePreset(table.GetSchemaPreset()); + if (validationStatus.IsFail()) { + return validationStatus; + } + } + if (table.HasSchema()) { + const auto validationStatus = ValidateTableSchema(table.GetSchema()); + if (validationStatus.IsFail()) { + return validationStatus; + } + } + } return TConclusionStatus::Success(); + } - static const THashSet supportedTypes = { + TConclusionStatus ValidateTableSchema(const NKikimrSchemeOp::TColumnTableSchema& schema) const { + namespace NTypeIds = NScheme::NTypeIds; + static const THashSet pkSupportedTypes = { NTypeIds::Timestamp, NTypeIds::Int8, NTypeIds::Int16, @@ -113,41 +133,47 @@ namespace NKikimr::NColumnShard { NTypeIds::Utf8, NTypeIds::Decimal }; - if (!schema.HasEngine() || schema.GetEngine() != NKikimrSchemeOp::EColumnTableEngine::COLUMN_ENGINE_REPLACING_TIMESERIES) { - return false; + return TConclusionStatus::Fail("Invalid scheme engine: " + (schema.HasEngine() ? NKikimrSchemeOp::EColumnTableEngine_Name(schema.GetEngine()) : TString("No"))); } if (!schema.KeyColumnNamesSize()) { - return false; + return TConclusionStatus::Fail("There is no key columns"); } - TString firstKeyColumn = schema.GetKeyColumnNames()[0]; THashSet keyColumns(schema.GetKeyColumnNames().begin(), schema.GetKeyColumnNames().end()); - + TVector columnErrors; for (const NKikimrSchemeOp::TOlapColumnDescription& column : schema.GetColumns()) { TString name = column.GetName(); - /* - if (column.GetNotNull() && keyColumns.contains(name)) { - return false; + void* typeDescr = nullptr; + if (column.GetTypeId() == NTypeIds::Pg && column.HasTypeInfo()) { + typeDescr = NPg::TypeDescFromPgTypeId(column.GetTypeInfo().GetPgTypeId()); + } + + NScheme::TTypeInfo schemeType(column.GetTypeId(), typeDescr); + if (keyColumns.contains(name) && !pkSupportedTypes.contains(column.GetTypeId())) { + columnErrors.emplace_back("key column " + name + " has unsupported type " + column.GetTypeName()); } - */ - if (name == firstKeyColumn && !supportedTypes.contains(column.GetTypeId())) { - return false; + auto arrowType = NArrow::GetArrowType(schemeType); + if (!arrowType.ok()) { + columnErrors.emplace_back("column " + name + ": " + arrowType.status().ToString()); } keyColumns.erase(name); } + if (!columnErrors.empty()) { + return TConclusionStatus::Fail("Column errors: " + JoinSeq("; ", columnErrors)); + } if (!keyColumns.empty()) { - return false; + return TConclusionStatus::Fail("Key columns not in scheme: " + JoinSeq(", ", keyColumns)); } - return true; + return TConclusionStatus::Success(); } - bool ValidateTablePreset(const NKikimrSchemeOp::TColumnTableSchemaPreset& preset) const { + TConclusionStatus ValidateTablePreset(const NKikimrSchemeOp::TColumnTableSchemaPreset& preset) const { if (preset.HasName() && preset.GetName() != "default") { - return false; + return TConclusionStatus::Fail("Preset name must be empty or 'default', but '" + preset.GetName() + "' got"); } return ValidateTableSchema(preset.GetSchema()); } diff --git a/ydb/core/tx/columnshard/transactions/operators/ya.make b/ydb/core/tx/columnshard/transactions/operators/ya.make index 449ec29a137d..c412503159f6 100644 --- a/ydb/core/tx/columnshard/transactions/operators/ya.make +++ b/ydb/core/tx/columnshard/transactions/operators/ya.make @@ -4,10 +4,13 @@ SRCS( GLOBAL schema.cpp GLOBAL long_tx_write.cpp GLOBAL ev_write.cpp + GLOBAL backup.cpp ) PEERDIR( ydb/core/tx/columnshard/transactions + ydb/core/tx/columnshard/data_sharing/destination/events + ydb/core/tx/columnshard/export/manager ) END() diff --git a/ydb/core/tx/columnshard/transactions/propose_transaction_base.cpp b/ydb/core/tx/columnshard/transactions/propose_transaction_base.cpp new file mode 100644 index 000000000000..ef50e2ae5959 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/propose_transaction_base.cpp @@ -0,0 +1,37 @@ +#include "propose_transaction_base.h" + +#include + + +namespace NKikimr::NColumnShard { + + void TProposeTransactionBase::ProposeTransaction(const TTxController::TBasicTxInfo& txInfo, const TString& txBody, const TActorId source, const ui64 cookie, TTransactionContext& txc) { + auto txOperator = TTxController::ITransactionOperatior::TFactory::MakeHolder(txInfo.TxKind, TTxController::TTxInfo(txInfo.TxKind, txInfo.TxId)); + if (!txOperator || !txOperator->Parse(txBody)) { + TTxController::TProposeResult proposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Error processing commit TxId# " << txInfo.TxId + << (txOperator ? ". Parsing error " : ". Unknown operator for txKind")); + OnProposeError(proposeResult, txInfo); + return; + } + + auto txInfoPtr = Self->GetProgressTxController().GetTxInfo(txInfo.TxId); + if (!!txInfoPtr) { + if (txInfoPtr->Source != source || txInfoPtr->Cookie != cookie) { + TTxController::TProposeResult proposeResult(NKikimrTxColumnShard::EResultStatus::ERROR, TStringBuilder() << "Another commit TxId# " << txInfo.TxId << " has already been proposed"); + OnProposeError(proposeResult, txInfo); + } + TTxController::TProposeResult proposeResult; + OnProposeResult(proposeResult, *txInfoPtr); + } else { + auto proposeResult = txOperator->Propose(*Self, txc, false); + if (!!proposeResult) { + const auto fullTxInfo = txOperator->TxWithDeadline() ? Self->GetProgressTxController().RegisterTxWithDeadline(txInfo.TxId, txInfo.TxKind, txBody, source, cookie, txc) + : Self->GetProgressTxController().RegisterTx(txInfo.TxId, txInfo.TxKind, txBody, source, cookie, txc); + + OnProposeResult(proposeResult, fullTxInfo); + } else { + OnProposeError(proposeResult, txInfo); + } + } + } +} diff --git a/ydb/core/tx/columnshard/transactions/propose_transaction_base.h b/ydb/core/tx/columnshard/transactions/propose_transaction_base.h new file mode 100644 index 000000000000..7657c4aff475 --- /dev/null +++ b/ydb/core/tx/columnshard/transactions/propose_transaction_base.h @@ -0,0 +1,22 @@ +#pragma once +#include "tx_controller.h" + +namespace NKikimr::NColumnShard { + +class TColumnShard; + +class TProposeTransactionBase : public NTabletFlatExecutor::TTransactionBase { +public: + TProposeTransactionBase(TColumnShard* self) + : TBase(self) + {} + +protected: + void ProposeTransaction(const TTxController::TBasicTxInfo& txInfo, const TString& txBody, const TActorId source, const ui64 cookie, TTransactionContext& txc); + + virtual void OnProposeResult(TTxController::TProposeResult& proposeResult, const TTxController::TTxInfo& txInfo) = 0; + virtual void OnProposeError(TTxController::TProposeResult& proposeResult, const TTxController::TBasicTxInfo& txInfo) = 0; +}; + + +} diff --git a/ydb/core/tx/columnshard/transactions/tx_controller.cpp b/ydb/core/tx/columnshard/transactions/tx_controller.cpp index 1137456d3d69..43a56ecf348e 100644 --- a/ydb/core/tx/columnshard/transactions/tx_controller.cpp +++ b/ydb/core/tx/columnshard/transactions/tx_controller.cpp @@ -23,7 +23,7 @@ ui64 TTxController::GetAllowedStep() const { } ui64 TTxController::GetMemoryUsage() const { - return BasicTxInfo.size() * sizeof(TBasicTxInfo) + + return BasicTxInfo.size() * sizeof(TTxInfo) + DeadlineQueue.size() * sizeof(TPlanQueueItem) + (PlanQueue.size() + RunningQueue.size()) * sizeof(TPlanQueueItem); } @@ -45,9 +45,11 @@ bool TTxController::Load(NTabletFlatExecutor::TTransactionContext& txc) { return false; while (!rowset.EndOfSet()) { - ui64 txId = rowset.GetValue(); - auto& txInfo = BasicTxInfo[txId]; - txInfo.TxId = txId; + const ui64 txId = rowset.GetValue(); + const NKikimrTxColumnShard::ETransactionKind txKind = rowset.GetValue(); + + auto txInfoIt = BasicTxInfo.emplace(txId, TTxInfo(txKind, txId)).first; + auto& txInfo = txInfoIt->second; txInfo.MaxStep = rowset.GetValue(); if (txInfo.MaxStep != Max()) { txInfo.MinStep = txInfo.MaxStep - MaxCommitTxDelay.MilliSeconds(); @@ -55,7 +57,6 @@ bool TTxController::Load(NTabletFlatExecutor::TTransactionContext& txc) { txInfo.PlanStep = rowset.GetValueOrDefault(0); txInfo.Source = rowset.GetValue(); txInfo.Cookie = rowset.GetValue(); - txInfo.TxKind = rowset.GetValue(); if (txInfo.PlanStep != 0) { PlanQueue.emplace(txInfo.PlanStep, txInfo.TxId); @@ -90,12 +91,11 @@ TTxController::ITransactionOperatior::TPtr TTxController::GetVerifiedTxOperator( return it->second; } -const TTxController::TBasicTxInfo& TTxController::RegisterTx(const ui64 txId, const NKikimrTxColumnShard::ETransactionKind& txKind, const TString& txBody, const TActorId& source, const ui64 cookie, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TTxInfo TTxController::RegisterTx(const ui64 txId, const NKikimrTxColumnShard::ETransactionKind& txKind, const TString& txBody, const TActorId& source, const ui64 cookie, NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); - auto& txInfo = BasicTxInfo[txId]; - txInfo.TxId = txId; - txInfo.TxKind = txKind; + auto txInfoIt = BasicTxInfo.emplace(txId, TTxInfo(txKind, txId)).first; + auto& txInfo = txInfoIt->second; txInfo.Source = source; txInfo.Cookie = cookie; @@ -108,12 +108,11 @@ const TTxController::TBasicTxInfo& TTxController::RegisterTx(const ui64 txId, co return txInfo; } -const TTxController::TBasicTxInfo& TTxController::RegisterTxWithDeadline(const ui64 txId, const NKikimrTxColumnShard::ETransactionKind& txKind, const TString& txBody, const TActorId& source, const ui64 cookie, NTabletFlatExecutor::TTransactionContext& txc) { +TTxController::TTxInfo TTxController::RegisterTxWithDeadline(const ui64 txId, const NKikimrTxColumnShard::ETransactionKind& txKind, const TString& txBody, const TActorId& source, const ui64 cookie, NTabletFlatExecutor::TTransactionContext& txc) { NIceDb::TNiceDb db(txc.DB); - auto& txInfo = BasicTxInfo[txId]; - txInfo.TxId = txId; - txInfo.TxKind = txKind; + auto txInfoIt = BasicTxInfo.emplace(txId, TTxInfo(txKind, txId)).first; + auto& txInfo = txInfoIt->second; txInfo.Source = source; txInfo.Cookie = cookie; txInfo.MinStep = GetAllowedStep(); @@ -174,7 +173,7 @@ bool TTxController::CancelTx(const ui64 txId, NTabletFlatExecutor::TTransactionC return true; } -std::optional TTxController::StartPlannedTx() { +std::optional TTxController::StartPlannedTx() { if (!PlanQueue.empty()) { auto node = PlanQueue.extract(PlanQueue.begin()); auto& item = node.value(); @@ -196,7 +195,7 @@ void TTxController::FinishPlannedTx(const ui64 txId, NTabletFlatExecutor::TTrans } void TTxController::CompleteRunningTx(const TPlanQueueItem& txItem) { - RunningQueue.erase(txItem); + AFL_VERIFY(RunningQueue.erase(txItem))("info", txItem.DebugString()); } std::optional TTxController::GetPlannedTx() const { @@ -206,17 +205,19 @@ std::optional TTxController::GetPlannedTx() const return *PlanQueue.begin(); } -const TTxController::TBasicTxInfo* TTxController::GetTxInfo(const ui64 txId) const { - return BasicTxInfo.FindPtr(txId); +std::optional TTxController::GetTxInfo(const ui64 txId) const { + auto txPtr = BasicTxInfo.FindPtr(txId); + if (txPtr) { + return *txPtr; + } + return std::nullopt; } -NEvents::TDataEvents::TCoordinatorInfo TTxController::GetCoordinatorInfo(const ui64 txId) const { - auto txInfo = BasicTxInfo.FindPtr(txId); - Y_ABORT_UNLESS(txInfo); +NEvents::TDataEvents::TCoordinatorInfo TTxController::BuildCoordinatorInfo(const TTxInfo& txInfo) const { if (Owner.ProcessingParams) { - return NEvents::TDataEvents::TCoordinatorInfo(txInfo->MinStep, txInfo->MaxStep, Owner.ProcessingParams->GetCoordinators()); + return NEvents::TDataEvents::TCoordinatorInfo(txInfo.MinStep, txInfo.MaxStep, Owner.ProcessingParams->GetCoordinators()); } - return NEvents::TDataEvents::TCoordinatorInfo(txInfo->MinStep, txInfo->MaxStep, {}); + return NEvents::TDataEvents::TCoordinatorInfo(txInfo.MinStep, txInfo.MaxStep, {}); } size_t TTxController::CleanExpiredTxs(NTabletFlatExecutor::TTransactionContext& txc) { @@ -239,6 +240,19 @@ size_t TTxController::CleanExpiredTxs(NTabletFlatExecutor::TTransactionContext& return removedCount; } +TDuration TTxController::GetTxCompleteLag(ui64 timecastStep) const { + if (PlanQueue.empty()) { + return TDuration::Zero(); + } + + ui64 currentStep = PlanQueue.begin()->Step; + if (timecastStep > currentStep) { + return TDuration::MilliSeconds(timecastStep - currentStep); + } + + return TDuration::Zero(); +} + TTxController::EPlanResult TTxController::PlanTx(const ui64 planStep, const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc) { auto it = BasicTxInfo.find(txId); if (it == BasicTxInfo.end()) { diff --git a/ydb/core/tx/columnshard/transactions/tx_controller.h b/ydb/core/tx/columnshard/transactions/tx_controller.h index fb38fae9d32e..d48bd504a1fb 100644 --- a/ydb/core/tx/columnshard/transactions/tx_controller.h +++ b/ydb/core/tx/columnshard/transactions/tx_controller.h @@ -24,16 +24,32 @@ class TTxController { inline bool operator<(const TPlanQueueItem& rhs) const { return Step < rhs.Step || (Step == rhs.Step && TxId < rhs.TxId); } + + TString DebugString() const { + return TStringBuilder() << "step=" << Step << ";txId=" << TxId << ";"; + } }; struct TBasicTxInfo { - ui64 TxId; + const NKikimrTxColumnShard::ETransactionKind TxKind; + const ui64 TxId; + public: + TBasicTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId) + : TxKind(txKind) + , TxId(txId) + {} + }; + + struct TTxInfo : public TBasicTxInfo { ui64 MaxStep = Max(); ui64 MinStep = 0; ui64 PlanStep = 0; TActorId Source; ui64 Cookie = 0; - NKikimrTxColumnShard::ETransactionKind TxKind; + public: + TTxInfo(const NKikimrTxColumnShard::ETransactionKind& txKind, const ui64 txId) + : TBasicTxInfo(txKind, txId) + {} }; class TProposeResult { @@ -47,18 +63,22 @@ class TTxController { {} bool operator!() const { - return Status != NKikimrTxColumnShard::EResultStatus::PREPARED; + return Status != NKikimrTxColumnShard::EResultStatus::PREPARED && Status != NKikimrTxColumnShard::EResultStatus::SUCCESS; + } + + TString DebugString() const { + return TStringBuilder() << "status=" << (ui64) Status << ";message=" << StatusMessage; } }; class ITransactionOperatior { protected: - TBasicTxInfo TxInfo; + TTxInfo TxInfo; public: using TPtr = std::shared_ptr; - using TFactory = NObjectFactory::TParametrizedObjectFactory; + using TFactory = NObjectFactory::TParametrizedObjectFactory; - ITransactionOperatior(const TBasicTxInfo& txInfo) + ITransactionOperatior(const TTxInfo& txInfo) : TxInfo(txInfo) {} @@ -87,7 +107,7 @@ class TTxController { private: const TDuration MaxCommitTxDelay = TDuration::Seconds(30); TColumnShard& Owner; - THashMap BasicTxInfo; + THashMap BasicTxInfo; std::set DeadlineQueue; std::set PlanQueue; std::set RunningQueue; @@ -109,21 +129,22 @@ class TTxController { bool Load(NTabletFlatExecutor::TTransactionContext& txc); - const TBasicTxInfo& RegisterTx(const ui64 txId, const NKikimrTxColumnShard::ETransactionKind& txKind, const TString& txBody, const TActorId& source, const ui64 cookie, NTabletFlatExecutor::TTransactionContext& txc); - const TBasicTxInfo& RegisterTxWithDeadline(const ui64 txId, const NKikimrTxColumnShard::ETransactionKind& txKind, const TString& txBody, const TActorId& source, const ui64 cookie, NTabletFlatExecutor::TTransactionContext& txc); + TTxInfo RegisterTx(const ui64 txId, const NKikimrTxColumnShard::ETransactionKind& txKind, const TString& txBody, const TActorId& source, const ui64 cookie, NTabletFlatExecutor::TTransactionContext& txc); + TTxInfo RegisterTxWithDeadline(const ui64 txId, const NKikimrTxColumnShard::ETransactionKind& txKind, const TString& txBody, const TActorId& source, const ui64 cookie, NTabletFlatExecutor::TTransactionContext& txc); bool CancelTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); - std::optional StartPlannedTx(); + std::optional StartPlannedTx(); void FinishPlannedTx(const ui64 txId, NTabletFlatExecutor::TTransactionContext& txc); void CompleteRunningTx(const TPlanQueueItem& tx); std::optional GetPlannedTx() const; TPlanQueueItem GetFrontTx() const; - const TBasicTxInfo* GetTxInfo(const ui64 txId) const; - NEvents::TDataEvents::TCoordinatorInfo GetCoordinatorInfo(const ui64 txId) const; + std::optional GetTxInfo(const ui64 txId) const; + NEvents::TDataEvents::TCoordinatorInfo BuildCoordinatorInfo(const TTxInfo& txInfo) const; size_t CleanExpiredTxs(NTabletFlatExecutor::TTransactionContext& txc); + TDuration GetTxCompleteLag(ui64 timecastStep) const; enum class EPlanResult { Skipped, diff --git a/ydb/core/tx/columnshard/transactions/ya.make b/ydb/core/tx/columnshard/transactions/ya.make index 17f70379bb89..9994787f2fe4 100644 --- a/ydb/core/tx/columnshard/transactions/ya.make +++ b/ydb/core/tx/columnshard/transactions/ya.make @@ -2,11 +2,13 @@ LIBRARY() SRCS( tx_controller.cpp + propose_transaction_base.cpp ) PEERDIR( ydb/core/tablet_flat ydb/core/tx/data_events + ydb/core/tx/columnshard/data_sharing/destination/events ) IF (OS_WINDOWS) diff --git a/ydb/core/tx/columnshard/ut_rw/ut_backup.cpp b/ydb/core/tx/columnshard/ut_rw/ut_backup.cpp new file mode 100644 index 000000000000..377b9de19ced --- /dev/null +++ b/ydb/core/tx/columnshard/ut_rw/ut_backup.cpp @@ -0,0 +1,112 @@ +#include "columnshard_ut_common.h" + +#include +#include +#include + +#include +#include + + +namespace NKikimr { + +using namespace NColumnShard; +using namespace NTxUT; + +Y_UNIT_TEST_SUITE(Backup) { + + bool ProposeTx(TTestBasicRuntime& runtime, TActorId& sender, NKikimrTxColumnShard::ETransactionKind txKind, const TString& txBody, const ui64 txId) { + auto event = std::make_unique( + txKind, sender, txId, txBody); + + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, event.release()); + auto ev = runtime.GrabEdgeEvent(sender); + const auto& res = ev->Get()->Record; + UNIT_ASSERT_EQUAL(res.GetTxId(), txId); + UNIT_ASSERT_EQUAL(res.GetTxKind(), txKind); + return (res.GetStatus() == NKikimrTxColumnShard::PREPARED); + } + + void PlanTx(TTestBasicRuntime& runtime, TActorId& sender, NKikimrTxColumnShard::ETransactionKind txKind, NOlap::TSnapshot snap, bool waitResult = true) { + auto plan = std::make_unique(snap.GetPlanStep(), 0, TTestTxConfig::TxTablet0); + auto tx = plan->Record.AddTransactions(); + tx->SetTxId(snap.GetTxId()); + ActorIdToProto(sender, tx->MutableAckTo()); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, plan.release()); + + UNIT_ASSERT(runtime.GrabEdgeEvent(sender)); + if (waitResult) { + auto ev = runtime.GrabEdgeEvent(sender); + const auto& res = ev->Get()->Record; + UNIT_ASSERT_EQUAL(res.GetTxId(), snap.GetTxId()); + UNIT_ASSERT_EQUAL(res.GetTxKind(), txKind); + UNIT_ASSERT_EQUAL(res.GetStatus(), NKikimrTxColumnShard::SUCCESS); + } + } + + template + void TestWaitCondition(TTestBasicRuntime& runtime, const TString& title, const TChecker& checker, const TDuration d = TDuration::Seconds(10)) { + const TInstant start = TInstant::Now(); + while (TInstant::Now() - start < d && !checker()) { + Cerr << "waiting " << title << Endl; + runtime.SimulateSleep(TDuration::Seconds(1)); + } + AFL_VERIFY(checker()); + } + + Y_UNIT_TEST(ProposeBackup) { + TTestBasicRuntime runtime; + TTester::Setup(runtime); + + const ui64 tableId = 1; + const std::vector schema = { + NArrow::NTest::TTestColumn("key1", TTypeInfo(NTypeIds::Uint64)), + NArrow::NTest::TTestColumn("key2", TTypeInfo(NTypeIds::Uint64)), + NArrow::NTest::TTestColumn("field", TTypeInfo(NTypeIds::Utf8) ) + }; + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + PrepareTablet(runtime, tableId, schema, 2); + ui64 txId = 111; + ui64 planStep = 1000000000; // greater then delays + + ui64 writeId = 1; + + TActorId sender = runtime.AllocateEdgeActor(); + + { + std::vector writeIds; + UNIT_ASSERT(WriteData(runtime, sender, writeId++, tableId, MakeTestBlob({0, 100}, schema), schema, true, &writeIds)); + ProposeCommit(runtime, sender, ++txId, writeIds); + PlanCommit(runtime, sender, ++planStep, txId); + } + + const ui32 start = csControllerGuard->GetInsertStartedCounter().Val(); + TestWaitCondition(runtime, "insert compacted", + [&]() { + ++writeId; + std::vector writeIds; + WriteData(runtime, sender, writeId, tableId, MakeTestBlob({writeId * 100, (writeId + 1) * 100}, schema), schema, true, &writeIds); + ProposeCommit(runtime, sender, ++txId, writeIds); + PlanCommit(runtime, sender, ++planStep, txId); + return csControllerGuard->GetInsertStartedCounter().Val() > start + 1; + }, TDuration::Seconds(1000)); + + NKikimrTxColumnShard::TBackupTxBody txBody; + NOlap::TSnapshot backupSnapshot(planStep, txId); + txBody.MutableBackupTask()->SetTableName("abcde"); + txBody.MutableBackupTask()->SetTableId(tableId); + txBody.MutableBackupTask()->SetSnapshotStep(backupSnapshot.GetPlanStep()); + txBody.MutableBackupTask()->SetSnapshotTxId(backupSnapshot.GetTxId()); + txBody.MutableBackupTask()->MutableS3Settings()->SetEndpoint("fake"); + txBody.MutableBackupTask()->MutableS3Settings()->SetSecretKey("fakeSecret"); + UNIT_ASSERT(ProposeTx(runtime, sender, NKikimrTxColumnShard::TX_KIND_BACKUP, txBody.SerializeAsString(), ++txId)); + AFL_VERIFY(csControllerGuard->GetFinishedExportsCount() == 0); + PlanTx(runtime, sender, NKikimrTxColumnShard::TX_KIND_BACKUP, NOlap::TSnapshot(++planStep, txId)); + TestWaitCondition(runtime, "export", + []() {return Singleton()->GetSize(); }); + TestWaitCondition(runtime, "finish", + [&]() {return csControllerGuard->GetFinishedExportsCount() == 1; }); + } +} + +} // namespace NKikimr diff --git a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp index 5d717773d30d..e1bc10e38fb0 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_columnshard_read_write.cpp @@ -7,10 +7,11 @@ #include #include #include -#include +#include #include #include #include +#include #include #include #include @@ -33,12 +34,10 @@ using TTypeInfo = NScheme::TTypeInfo; using TDefaultTestsController = NKikimr::NYDBTest::NColumnShard::TController; class TDisableCompactionController: public NKikimr::NYDBTest::NColumnShard::TController { -protected: - virtual bool DoOnStartCompaction(std::shared_ptr& changes) { - changes = nullptr; - return true; - } public: + TDisableCompactionController() { + DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); + } }; template @@ -338,6 +337,7 @@ bool CheckColumns(const std::shared_ptr& batch, const std::v void TestWrite(const TestTableDescription& table) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); @@ -352,12 +352,12 @@ void TestWrite(const TestTableDescription& table) { SetupSchema(runtime, sender, tableId, table); - const std::vector>& ydbSchema = table.Schema; + const auto& ydbSchema = table.Schema; bool ok = WriteData(runtime, sender, writeId++, tableId, MakeTestBlob({0, 100}, ydbSchema), ydbSchema); UNIT_ASSERT(ok); - std::vector> schema = ydbSchema; + auto schema = ydbSchema; // no data @@ -373,7 +373,7 @@ void TestWrite(const TestTableDescription& table) { // missing columns - schema.resize(4); + schema = NArrow::NTest::TTestColumn::CropSchema(schema, 4); ok = WriteData(runtime, sender, writeId++, tableId, MakeTestBlob({0, 100}, schema), schema); UNIT_ASSERT(ok); @@ -381,7 +381,7 @@ void TestWrite(const TestTableDescription& table) { // It fails only if we specify source schema. No way to detect it from serialized batch data. schema = ydbSchema; - schema[0].second = TTypeInfo(NTypeIds::Int64); + schema[0].SetType(TTypeInfo(NTypeIds::Int64)); ok = WriteData(runtime, sender, writeId++, tableId, MakeTestBlob({0, 100}, schema), schema); UNIT_ASSERT(!ok); @@ -390,7 +390,7 @@ void TestWrite(const TestTableDescription& table) { for (size_t i = 0; i < ydbSchema.size(); ++i) { schema = ydbSchema; - schema[i].second = TTypeInfo(NTypeIds::Int8); + schema[i].SetType(TTypeInfo(NTypeIds::Int8)); ok = WriteData(runtime, sender, writeId++, tableId, MakeTestBlob({0, 100}, schema), schema); UNIT_ASSERT(!ok); } @@ -399,24 +399,24 @@ void TestWrite(const TestTableDescription& table) { for (size_t i = 0; i < ydbSchema.size(); ++i) { schema = ydbSchema; - schema[i].second = TTypeInfo(NTypeIds::Int64); + schema[i].SetType(TTypeInfo(NTypeIds::Int64)); ok = WriteData(runtime, sender, writeId++, tableId, MakeTestBlob({0, 100}, schema), schema); - UNIT_ASSERT(ok == (ydbSchema[i].second == TTypeInfo(NTypeIds::Int64))); + UNIT_ASSERT(ok == (ydbSchema[i].GetType() == TTypeInfo(NTypeIds::Int64))); } schema = ydbSchema; - schema[1].second = TTypeInfo(NTypeIds::Utf8); - schema[5].second = TTypeInfo(NTypeIds::Int32); + schema[1].SetType(TTypeInfo(NTypeIds::Utf8)); + schema[5].SetType(TTypeInfo(NTypeIds::Int32)); ok = WriteData(runtime, sender, writeId++, tableId, MakeTestBlob({0, 100}, schema), schema); UNIT_ASSERT(!ok); // reordered columns - THashMap remap(ydbSchema.begin(), ydbSchema.end()); + THashMap remap = NArrow::NTest::TTestColumn::ConvertToHash(ydbSchema); - schema.resize(0); + schema.clear(); for (auto& [name, typeInfo] : remap) { - schema.push_back({name, typeInfo}); + schema.push_back(NArrow::NTest::TTestColumn(name, typeInfo)); } ok = WriteData(runtime, sender, writeId++, tableId, MakeTestBlob({0, 100}, schema), schema); @@ -434,6 +434,7 @@ void TestWrite(const TestTableDescription& table) { void TestWriteOverload(const TestTableDescription& table) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); @@ -624,7 +625,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString runtime.DispatchEvents(options); auto write = [&](TTestBasicRuntime& runtime, TActorId& sender, ui64 writeId, ui64 tableId, - const TString& data, const std::vector>& ydbSchema, std::vector& intWriteIds) { + const TString& data, const std::vector& ydbSchema, std::vector& intWriteIds) { bool ok = WriteData(runtime, sender, writeId, tableId, data, ydbSchema, true, &intWriteIds); if (reboots) { RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); @@ -654,8 +655,8 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString SetupSchema(runtime, sender, tableId, table, codec); - const std::vector>& ydbSchema = table.Schema; - const std::vector>& testYdbPk = table.Pk; + const std::vector& ydbSchema = table.Schema; + const std::vector& testYdbPk = table.Pk; // ----xx // -----xx.. @@ -879,8 +880,7 @@ void TestWriteRead(bool reboots, const TestTableDescription& table = {}, TString UNIT_ASSERT_LE(insertedBytes / 100000, 50); } if (committedBytes) { - UNIT_ASSERT_GE(committedBytes / 100000, 65); - UNIT_ASSERT_LE(committedBytes / 100000, 78); + UNIT_ASSERT_LE(committedBytes / 100000, 1); } if (compactedBytes) { if (codec == "" || codec == "lz4") { @@ -947,7 +947,7 @@ void TestCompactionInGranuleImpl(bool reboots, const TestTableDescription& table runtime.DispatchEvents(options); auto write = [&](TTestBasicRuntime& runtime, TActorId& sender, ui64 writeId, ui64 tableId, - const TString& data, const std::vector>& ydbSchema, std::vector& writeIds) { + const TString& data, const std::vector& ydbSchema, std::vector& writeIds) { bool ok = WriteData(runtime, sender, writeId, tableId, data, ydbSchema, true, &writeIds); if (reboots) { RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); @@ -1038,7 +1038,7 @@ void TestCompactionInGranuleImpl(bool reboots, const TestTableDescription& table UNIT_ASSERT(rb); UNIT_ASSERT(reader.IsCorrectlyFinished()); - if (ydbPk[0].second == TTypeInfo(NTypeIds::String) || ydbPk[0].second == TTypeInfo(NTypeIds::Utf8)) { + if (ydbPk[0].GetType() == TTypeInfo(NTypeIds::String) || ydbPk[0].GetType() == TTypeInfo(NTypeIds::Utf8)) { UNIT_ASSERT(DataHas({rb}, triggerPortion, true)); UNIT_ASSERT(DataHas({rb}, smallWrites, true)); } else { @@ -1224,6 +1224,7 @@ void TestReadWithProgram(const TestTableDescription& table = {}) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); @@ -1307,6 +1308,7 @@ void TestReadWithProgram(const TestTableDescription& table = {}) void TestReadWithProgramLike(const TestTableDescription& table = {}) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, @@ -1375,6 +1377,7 @@ void TestReadWithProgramLike(const TestTableDescription& table = {}) { void TestSomePrograms(const TestTableDescription& table) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); @@ -1430,12 +1433,13 @@ struct TReadAggregateResult { std::vector Counts = {100}; }; -void TestReadAggregate(const std::vector>& ydbSchema, const TString& testDataBlob, +void TestReadAggregate(const std::vector& ydbSchema, const TString& testDataBlob, bool addProjection, const std::vector& aggKeys = {}, const TReadAggregateResult& expectedResult = {}, const TReadAggregateResult& expectedFiltered = {1, {1}, {1}, {1}}) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); @@ -1449,8 +1453,7 @@ void TestReadAggregate(const std::vector>& ydbSche ui64 planStep = 100; ui64 txId = 100; - auto pk = ydbSchema; - pk.resize(4); + auto pk = NArrow::NTest::TTestColumn::CropSchema(ydbSchema, 4); TestTableDescription table{.Schema = ydbSchema, .Pk = pk}; SetupSchema(runtime, sender, tableId, table); @@ -1480,8 +1483,8 @@ void TestReadAggregate(const std::vector>& ydbSche ui32 prog = 0; for (ui32 i = 0; i < ydbSchema.size(); ++i, ++prog) { - if (intTypes.contains(ydbSchema[i].second.GetTypeId()) || - strTypes.contains(ydbSchema[i].second.GetTypeId())) { + if (intTypes.contains(ydbSchema[i].GetType().GetTypeId()) || + strTypes.contains(ydbSchema[i].GetType().GetTypeId())) { checkResult.insert(prog); } @@ -1497,8 +1500,8 @@ void TestReadAggregate(const std::vector>& ydbSche for (ui32 i = 0; i < ydbSchema.size(); ++i, ++prog) { isFiltered.insert(prog); - if (intTypes.contains(ydbSchema[i].second.GetTypeId()) || - strTypes.contains(ydbSchema[i].second.GetTypeId())) { + if (intTypes.contains(ydbSchema[i].GetType().GetTypeId()) || + strTypes.contains(ydbSchema[i].GetType().GetTypeId())) { checkResult.insert(prog); } @@ -1516,8 +1519,8 @@ void TestReadAggregate(const std::vector>& ydbSche std::vector unnamedColumns = {"100", "101", "102", "103"}; if (!addProjection) { for (auto& key : aggKeys) { - namedColumns.push_back(ydbSchema[key].first); - unnamedColumns.push_back(ydbSchema[key].first); + namedColumns.push_back(ydbSchema[key].GetName()); + unnamedColumns.push_back(ydbSchema[key].GetName()); } } @@ -1563,13 +1566,14 @@ Y_UNIT_TEST_SUITE(EvWrite) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); const ui64 ownerId = 0; const ui64 tableId = 1; const ui64 schemaVersion = 1; - const std::vector> schema = { - {"key", TTypeInfo(NTypeIds::Uint64) }, - {"field", TTypeInfo(NTypeIds::Utf8) } + const std::vector schema = { + NArrow::NTest::TTestColumn("key", TTypeInfo(NTypeIds::Uint64)), + NArrow::NTest::TTestColumn("field", TTypeInfo(NTypeIds::Utf8)) }; const std::vector columnsIds = {1, 2}; PrepareTablet(runtime, tableId, schema); @@ -1583,8 +1587,9 @@ Y_UNIT_TEST_SUITE(EvWrite) { TString blobData = NArrow::SerializeBatchNoCompression(batch); UNIT_ASSERT(blobData.size() < TLimits::GetMaxBlobSize()); - auto evWrite = std::make_unique(txId, NKikimrDataEvents::TEvWrite::MODE_PREPARE); - ui64 payloadIndex = NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(std::move(blobData)); + auto evWrite = std::make_unique(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWrite->SetTxId(txId); + ui64 payloadIndex = NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, {ownerId, tableId, schemaVersion}, columnsIds, payloadIndex, NKikimrDataEvents::FORMAT_ARROW); TActorId sender = runtime.AllocateEdgeActor(); @@ -1613,14 +1618,16 @@ Y_UNIT_TEST_SUITE(EvWrite) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + const ui64 ownerId = 0; const ui64 tableId = 1; const ui64 schemaVersion = 1; - const std::vector> schema = { - {"key", TTypeInfo(NTypeIds::Uint64) }, - {"field", TTypeInfo(NTypeIds::Utf8) } - }; + const std::vector schema = { + NArrow::NTest::TTestColumn("key", TTypeInfo(NTypeIds::Uint64)), + NArrow::NTest::TTestColumn("field", TTypeInfo(NTypeIds::Utf8)) + }; const std::vector columnsIds = {1, 2}; PrepareTablet(runtime, tableId, schema); const ui64 txId = 111; @@ -1633,8 +1640,9 @@ Y_UNIT_TEST_SUITE(EvWrite) { TString blobData = NArrow::SerializeBatchNoCompression(batch); UNIT_ASSERT(blobData.size() < TLimits::GetMaxBlobSize()); - auto evWrite = std::make_unique(txId, NKikimrDataEvents::TEvWrite::MODE_PREPARE); - ui64 payloadIndex = NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(std::move(blobData)); + auto evWrite = std::make_unique(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWrite->SetTxId(txId); + ui64 payloadIndex = NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, {ownerId, tableId, schemaVersion}, columnsIds, payloadIndex, NKikimrDataEvents::FORMAT_ARROW); TActorId sender = runtime.AllocateEdgeActor(); @@ -1660,14 +1668,16 @@ Y_UNIT_TEST_SUITE(EvWrite) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + const ui64 ownerId = 0; const ui64 tableId = 1; const ui64 schemaVersion = 1; - const std::vector> schema = { - {"key", TTypeInfo(NTypeIds::Uint64) }, - {"field", TTypeInfo(NTypeIds::Utf8) } - }; + const std::vector schema = { + NArrow::NTest::TTestColumn("key", TTypeInfo(NTypeIds::Uint64)), + NArrow::NTest::TTestColumn("field", TTypeInfo(NTypeIds::Utf8)) + }; const std::vector columnsIds = {1, 2}; PrepareTablet(runtime, tableId, schema); const ui64 txId = 111; @@ -1680,8 +1690,9 @@ Y_UNIT_TEST_SUITE(EvWrite) { TString blobData = NArrow::SerializeBatchNoCompression(batch); UNIT_ASSERT(blobData.size() > TLimits::GetMaxBlobSize()); - auto evWrite = std::make_unique(txId, NKikimrDataEvents::TEvWrite::MODE_PREPARE); - ui64 payloadIndex = NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(std::move(blobData)); + auto evWrite = std::make_unique(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWrite->SetTxId(txId); + ui64 payloadIndex = NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, {ownerId, tableId, schemaVersion}, columnsIds, payloadIndex, NKikimrDataEvents::FORMAT_ARROW); TActorId sender = runtime.AllocateEdgeActor(); @@ -1698,6 +1709,105 @@ Y_UNIT_TEST_SUITE(EvWrite) { auto readResult = ReadAllAsBatch(runtime, tableId, NOlap::TSnapshot(11, txId), schema); UNIT_ASSERT_VALUES_EQUAL(readResult->num_rows(), 2048); } + + Y_UNIT_TEST(WriteWithLock) { + using namespace NArrow; + + TTestBasicRuntime runtime; + TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + + + const ui64 ownerId = 0; + const ui64 tableId = 1; + const ui64 schemaVersion = 1; + const std::vector schema = { + NArrow::NTest::TTestColumn("key", TTypeInfo(NTypeIds::Uint64) ), + NArrow::NTest::TTestColumn("field", TTypeInfo(NTypeIds::Utf8) ) + }; + const std::vector columnsIds = {1, 2}; + PrepareTablet(runtime, tableId, schema); + const ui64 txId = 111; + const ui64 lockId = 110; + + { + NConstruction::IArrayBuilder::TPtr keyColumn = std::make_shared>>("key"); + NConstruction::IArrayBuilder::TPtr column = std::make_shared>("field", NConstruction::TStringPoolFiller(8, 100)); + auto batch = NConstruction::TRecordBatchConstructor({ keyColumn, column }).BuildBatch(2048); + TString blobData = NArrow::SerializeBatchNoCompression(batch); + UNIT_ASSERT(blobData.size() < TLimits::GetMaxBlobSize()); + auto evWrite = std::make_unique(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWrite->SetLockId(lockId, 1); + + ui64 payloadIndex = NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); + evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, {ownerId, tableId, schemaVersion}, columnsIds, payloadIndex, NKikimrDataEvents::FORMAT_ARROW); + + TActorId sender = runtime.AllocateEdgeActor(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, evWrite.release()); + + { + TAutoPtr handle; + auto event = runtime.GrabEdgeEvent(handle); + UNIT_ASSERT(event); + UNIT_ASSERT_VALUES_EQUAL(event->Record.GetOrigin(), TTestTxConfig::TxTablet0); + UNIT_ASSERT_VALUES_EQUAL(event->Record.GetTxId(), lockId); + UNIT_ASSERT_VALUES_EQUAL((ui64)event->Record.GetStatus(), (ui64)NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + + auto readResult = ReadAllAsBatch(runtime, tableId, NOlap::TSnapshot(10, lockId), schema); + UNIT_ASSERT_VALUES_EQUAL(readResult->num_rows(), 0); + } + } + { + NConstruction::IArrayBuilder::TPtr keyColumn = std::make_shared>>("key", 2049); + NConstruction::IArrayBuilder::TPtr column = std::make_shared>("field", NConstruction::TStringPoolFiller(8, 100)); + auto batch = NConstruction::TRecordBatchConstructor({ keyColumn, column }).BuildBatch(2048); + TString blobData = NArrow::SerializeBatchNoCompression(batch); + UNIT_ASSERT(blobData.size() < TLimits::GetMaxBlobSize()); + auto evWrite = std::make_unique(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWrite->SetLockId(lockId, 1); + + ui64 payloadIndex = NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); + evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, {ownerId, tableId, schemaVersion}, columnsIds, payloadIndex, NKikimrDataEvents::FORMAT_ARROW); + + TActorId sender = runtime.AllocateEdgeActor(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, evWrite.release()); + + { + TAutoPtr handle; + auto event = runtime.GrabEdgeEvent(handle); + UNIT_ASSERT(event); + UNIT_ASSERT_VALUES_EQUAL(event->Record.GetOrigin(), TTestTxConfig::TxTablet0); + UNIT_ASSERT_VALUES_EQUAL(event->Record.GetTxId(), lockId); + UNIT_ASSERT_VALUES_EQUAL((ui64)event->Record.GetStatus(), (ui64)NKikimrDataEvents::TEvWriteResult::STATUS_COMPLETED); + + auto readResult = ReadAllAsBatch(runtime, tableId, NOlap::TSnapshot(10, txId), schema); + UNIT_ASSERT_VALUES_EQUAL(readResult->num_rows(), 0); + } + } + { + auto evWrite = std::make_unique(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWrite->SetLockId(lockId, 1); + evWrite->SetTxId(txId); + evWrite->Record.MutableLocks()->SetOp(NKikimrDataEvents::TKqpLocks::Commit); + + TActorId sender = runtime.AllocateEdgeActor(); + ForwardToTablet(runtime, TTestTxConfig::TxTablet0, sender, evWrite.release()); + + { + TAutoPtr handle; + auto event = runtime.GrabEdgeEvent(handle); + UNIT_ASSERT(event); + UNIT_ASSERT_VALUES_EQUAL(event->Record.GetOrigin(), TTestTxConfig::TxTablet0); + UNIT_ASSERT_VALUES_EQUAL((ui64)event->Record.GetStatus(), (ui64)NKikimrDataEvents::TEvWriteResult::STATUS_PREPARED); + UNIT_ASSERT_VALUES_EQUAL(event->Record.GetTxId(), txId); + } + + PlanWriteTx(runtime, sender, NOlap::TSnapshot(11, txId)); + } + + auto readResult = ReadAllAsBatch(runtime, tableId, NOlap::TSnapshot(11, txId), schema); + UNIT_ASSERT_VALUES_EQUAL(readResult->num_rows(), 2 * 2048); + } } Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { @@ -1787,8 +1897,8 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { auto schema = TTestSchema::YdbSchema(); auto pk = TTestSchema::YdbPkSchema(); - schema[0].second = TTypeInfo(typeId); - pk[0].second = TTypeInfo(typeId); + schema[0].SetType(TTypeInfo(typeId)); + pk[0].SetType(TTypeInfo(typeId)); TestTableDescription table{.Schema = schema, .Pk = pk}; TestCompactionInGranuleImpl(reboot, table); } @@ -1869,14 +1979,14 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { Y_UNIT_TEST(ReadSomePrograms) { TestTableDescription table; table.Schema = { - {"timestamp", TTypeInfo(NTypeIds::Timestamp) }, - {"resource_id", TTypeInfo(NTypeIds::Utf8) }, - {"uid", TTypeInfo(NTypeIds::Utf8) }, - {"level", TTypeInfo(NTypeIds::Int32) }, - {"message", TTypeInfo(NTypeIds::Utf8) } + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("resource_id", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("uid", TTypeInfo(NTypeIds::Utf8) ), + NArrow::NTest::TTestColumn("level", TTypeInfo(NTypeIds::Int32) ), + NArrow::NTest::TTestColumn("message", TTypeInfo(NTypeIds::Utf8) ) }; table.Pk = { - {"timestamp", TTypeInfo(NTypeIds::Timestamp) } + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ) }; TestSomePrograms(table); @@ -1913,7 +2023,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { Cerr << "-- group by key: " << key << "\n"; // the type has the same values in test batch so result would be grouped in one row - if (sameValTypes.contains(schema[key].second.GetTypeId())) { + if (sameValTypes.contains(schema[key].GetType().GetTypeId())) { TestReadAggregate(schema, testBlob, (key % 2), { key }, resGrouped, resFiltered); } else { TestReadAggregate(schema, testBlob, (key % 2), { key }, resDefault, resFiltered); @@ -1921,8 +2031,8 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } for (ui32 key = 0; key < schema.size() - 1; ++key) { Cerr << "-- group by key: " << key << ", " << key + 1 << "\n"; - if (sameValTypes.contains(schema[key].second.GetTypeId()) && - sameValTypes.contains(schema[key + 1].second.GetTypeId())) { + if (sameValTypes.contains(schema[key].GetType().GetTypeId()) && + sameValTypes.contains(schema[key + 1].GetType().GetTypeId())) { TestReadAggregate(schema, testBlob, (key % 2), { key, key + 1 }, resGrouped, resFiltered); } else { TestReadAggregate(schema, testBlob, (key % 2), { key, key + 1 }, resDefault, resFiltered); @@ -1930,9 +2040,9 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } for (ui32 key = 0; key < schema.size() - 2; ++key) { Cerr << "-- group by key: " << key << ", " << key + 1 << ", " << key + 2 << "\n"; - if (sameValTypes.contains(schema[key].second.GetTypeId()) && - sameValTypes.contains(schema[key + 1].second.GetTypeId()) && - sameValTypes.contains(schema[key + 1].second.GetTypeId())) { + if (sameValTypes.contains(schema[key].GetType().GetTypeId()) && + sameValTypes.contains(schema[key + 1].GetType().GetTypeId()) && + sameValTypes.contains(schema[key + 1].GetType().GetTypeId())) { TestReadAggregate(schema, testBlob, (key % 2), { key, key + 1, key + 2 }, resGrouped, resFiltered); } else { TestReadAggregate(schema, testBlob, (key % 2), { key, key + 1, key + 2 }, resDefault, resFiltered); @@ -1945,10 +2055,10 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { TTestBasicRuntime& Runtime; const ui64 PlanStep; const ui64 TxId; - const std::vector> YdbPk; + const std::vector YdbPk; public: - TTabletReadPredicateTest(TTestBasicRuntime& runtime, const ui64 planStep, const ui64 txId, const std::vector>& ydbPk) + TTabletReadPredicateTest(TTestBasicRuntime& runtime, const ui64 planStep, const ui64 txId, const std::vector& ydbPk) : Runtime(runtime) , PlanStep(planStep) , TxId(txId) @@ -1968,14 +2078,14 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { bool GetInclude() const noexcept { return Include; } - std::vector GetCellVec(const std::vector>& pk, + std::vector GetCellVec(const std::vector& pk, std::vector& mem, bool trailingNulls = false) const { UNIT_ASSERT(Border.size() <= pk.size()); std::vector cells; size_t i = 0; for (; i < Border.size(); ++i) { - cells.push_back(MakeTestCell(pk[i].second, Border[i], mem)); + cells.push_back(MakeTestCell(pk[i].GetType(), Border[i], mem)); } for (; trailingNulls && i < pk.size(); ++i) { cells.push_back(TCell()); @@ -1995,7 +2105,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { TTestCaseOptions& SetTo(const TBorder& border) { To = border; return *this; } TTestCaseOptions& SetExpectedCount(ui32 count) { ExpectedCount = count; return *this; } - TSerializedTableRange MakeRange(const std::vector>& pk) const { + TSerializedTableRange MakeRange(const std::vector& pk) const { std::vector mem; auto cellsFrom = From ? From->GetCellVec(pk, mem, false) : std::vector(); auto cellsTo = To ? To->GetCellVec(pk, mem) : std::vector(); @@ -2073,7 +2183,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { SetupSchema(runtime, sender, tableId, table, "lz4"); TAutoPtr handle; - bool isStrPk0 = table.Pk[0].second == TTypeInfo(NTypeIds::String) || table.Pk[0].second == TTypeInfo(NTypeIds::Utf8); + bool isStrPk0 = table.Pk[0].GetType() == TTypeInfo(NTypeIds::String) || table.Pk[0].GetType() == TTypeInfo(NTypeIds::Utf8); // Write different keys: grow on compaction @@ -2190,13 +2300,13 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { ui32 resultLimit = 1024 * 1024; runtime.Send(new IEventHandle(scanActorId, sender, new NKqp::TEvKqpCompute::TEvScanDataAck(resultLimit, 0, 1))); auto scan = runtime.GrabEdgeEvent(handle); - auto batchStats = scan->ArrowBatch; if (scan->Finished) { AFL_VERIFY(!scan->ArrowBatch || !scan->ArrowBatch->num_rows()); break; } - UNIT_ASSERT(batchStats); -// Cerr << batchStats->ToString() << Endl; + UNIT_ASSERT(scan->ArrowBatch); + auto batchStats = NArrow::ToBatch(scan->ArrowBatch, true); + // Cerr << batchStats->ToString() << Endl; for (ui32 i = 0; i < batchStats->num_rows(); ++i) { auto paths = batchStats->GetColumnByName("PathId"); @@ -2222,7 +2332,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { if (!keyColumnId) { keyColumnId = internalColumnId; } - Cerr << "[" << __LINE__ << "] " << activity << " " << table.Pk[0].second.GetTypeId() << " " + Cerr << "[" << __LINE__ << "] " << activity << " " << table.Pk[0].GetType().GetTypeId() << " " << pathId << " " << kindStr << " " << numRows << " " << numBytes << " " << numRawBytes << "\n"; if (pathId == tableId) { @@ -2260,12 +2370,12 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { auto schema = TTestSchema::YdbSchema(); auto pk = TTestSchema::YdbPkSchema(); TTestBlobOptions opts; - opts.SameValueColumns.emplace(pk[0].first); + opts.SameValueColumns.emplace(pk[0].GetName()); - schema[0].second = TTypeInfo(typeId); - pk[0].second = TTypeInfo(typeId); - schema[1].second = TTypeInfo(typeId); - pk[1].second = TTypeInfo(typeId); + schema[0].SetType(TTypeInfo(typeId)); + pk[0].SetType(TTypeInfo(typeId)); + schema[1].SetType(TTypeInfo(typeId)); + pk[1].SetType(TTypeInfo(typeId)); TestTableDescription table{.Schema = schema, .Pk = pk}; TestCompactionSplitGranuleImpl(table, opts); } @@ -2305,6 +2415,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { Y_UNIT_TEST(ReadStale) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); @@ -2322,7 +2433,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { SetupSchema(runtime, sender, tableId); TAutoPtr handle; - // Write some test data to adavnce the time + // Write some test data to advance the time { std::pair triggerPortion = {1, 1000}; TString triggerData = MakeTestBlob(triggerPortion, ydbSchema); @@ -2434,7 +2545,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { } Cerr << Endl; } - if (auto cleanup = dynamic_pointer_cast(msg->IndexChanges)) { + if (auto cleanup = dynamic_pointer_cast(msg->IndexChanges)) { Y_ABORT_UNLESS(cleanup->PortionsToDrop.size()); ++cleanupsHappened; Cerr << "Cleanup old portions:"; @@ -2477,7 +2588,7 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { Cerr << " deletes blobs: " << JoinStrings(msg->DoNotKeep->begin(), msg->DoNotKeep->end(), " "); for (const auto& blobId : *msg->DoNotKeep) { deletedBlobs.insert(blobId.ToString()); - delayedBlobs.erase(TUnifiedBlobId(0, blobId).ToStringNew()); + delayedBlobs.erase(NOlap::TUnifiedBlobId(0, blobId).ToStringNew()); } } Cerr << Endl; @@ -2613,6 +2724,18 @@ Y_UNIT_TEST_SUITE(TColumnShardTestReadWrite) { Y_UNIT_TEST(CompactionGC) { TestCompactionGC(); } + + Y_UNIT_TEST(PortionInfoSize) { + Cerr << sizeof(NOlap::TPortionInfo) << Endl; + Cerr << sizeof(NOlap::TPortionMeta) << Endl; + Cerr << sizeof(NOlap::TColumnRecord) << Endl; + Cerr << sizeof(NOlap::TIndexChunk) << Endl; + Cerr << sizeof(std::optional) << Endl; + Cerr << sizeof(std::optional) << Endl; + Cerr << sizeof(NOlap::TSnapshot) << Endl; + Cerr << sizeof(NArrow::TReplaceKey) << Endl; + Cerr << sizeof(NArrow::NMerger::TSortableBatchPosition) << Endl; + } } } diff --git a/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp b/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp index 3c467583b189..8f6df202a4ec 100644 --- a/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp +++ b/ydb/core/tx/columnshard/ut_rw/ut_normalizer.cpp @@ -32,6 +32,16 @@ struct TPortionRecord { ui32 Size = 0; }; + +class TNormalizerChecker { +public: + virtual ~TNormalizerChecker() {} + + virtual ui64 RecordsCountAfterReboot(const ui64 initialRecodsCount) const { + return initialRecodsCount; + } +}; + class TPathIdCleaner : public NYDBTest::ILocalDBModifier { public: virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { @@ -149,59 +159,78 @@ class TColumnChunksCleaner : public NYDBTest::ILocalDBModifier { } } }; - -class TMinMaxCleaner : public NYDBTest::ILocalDBModifier { +/* +class TPortinosCleaner : public NYDBTest::ILocalDBModifier { public: virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { using namespace NColumnShard; NIceDb::TNiceDb db(txc.DB); - std::vector portion2Key; - std::optional pathId; + std::vector portions; { - auto rowset = db.Table().Select(); + auto rowset = db.Table().Select(); UNIT_ASSERT(rowset.IsReady()); while (!rowset.EndOfSet()) { - TPortionRecord key; - key.Index = rowset.GetValue(); - key.Granule = rowset.GetValue(); - key.ColumnIdx = rowset.GetValue(); - key.PlanStep = rowset.GetValue(); - key.TxId = rowset.GetValue(); - key.Portion = rowset.GetValue(); - key.Chunk = rowset.GetValue(); - - key.XPlanStep = rowset.GetValue(); - key.XTxId = rowset.GetValue(); - key.Blob = rowset.GetValue(); - key.Metadata = rowset.GetValue(); - key.Offset = rowset.GetValue(); - key.Size = rowset.GetValue(); + NOlap::TPortionAddress addr(rowset.GetValue(), rowset.GetValue()); + portions.emplace_back(addr); + UNIT_ASSERT(rowset.Next()); + } + } - pathId = rowset.GetValue(); + for (auto&& key: portions) { + db.Table().Key(key.GetPathId(), key.GetPortionId()).Delete(); + } + } +}; +*/ +class TTablesCleaner : public NYDBTest::ILocalDBModifier { +public: + virtual void Apply(NTabletFlatExecutor::TTransactionContext& txc) const override { + using namespace NColumnShard; + NIceDb::TNiceDb db(txc.DB); - portion2Key.emplace_back(std::move(key)); + std::vector tables; + { + auto rowset = db.Table().Select(); + UNIT_ASSERT(rowset.IsReady()); + while (!rowset.EndOfSet()) { + const auto pathId = rowset.GetValue(); + tables.emplace_back(pathId); UNIT_ASSERT(rowset.Next()); } } - UNIT_ASSERT(pathId.has_value()); + for (auto&& key: tables) { + db.Table().Key(key).Delete(); + } - for (auto&& key: portion2Key) { - NKikimrTxColumnShard::TIndexColumnMeta metaProto; - UNIT_ASSERT(metaProto.ParseFromArray(key.Metadata.data(), key.Metadata.size())); - if (metaProto.HasPortionMeta()) { - metaProto.MutablePortionMeta()->ClearRecordSnapshotMax(); - metaProto.MutablePortionMeta()->ClearRecordSnapshotMin(); + struct TKey { + ui64 PathId; + ui64 Step; + ui64 TxId; + }; + + std::vector versions; + { + auto rowset = db.Table().Select(); + UNIT_ASSERT(rowset.IsReady()); + + while (!rowset.EndOfSet()) { + TKey key; + key.PathId = rowset.GetValue(); + key.Step = rowset.GetValue(); + key.TxId = rowset.GetValue(); + versions.emplace_back(key); + UNIT_ASSERT(rowset.Next()); } + } - db.Table().Key(key.Index, key.Granule, key.ColumnIdx, - key.PlanStep, key.TxId, key.Portion, key.Chunk).Update( - NIceDb::TUpdate(metaProto.SerializeAsString()) - ); + for (auto&& key: versions) { + db.Table().Key(key.PathId, key.Step, key.TxId).Delete(); } + } }; @@ -218,7 +247,7 @@ class TPrepareLocalDBController: public NKikimr::NYDBTest::NColumnShard::TContro Y_UNIT_TEST_SUITE(Normalizers) { template - void TestNormalizerImpl() { + void TestNormalizerImpl(const TNormalizerChecker& checker = TNormalizerChecker()) { using namespace NArrow; auto csControllerGuard = NYDBTest::TControllers::RegisterCSControllerGuard>(); @@ -228,10 +257,10 @@ Y_UNIT_TEST_SUITE(Normalizers) { const ui64 ownerId = 0; const ui64 tableId = 1; const ui64 schemaVersion = 1; - const std::vector> schema = { - {"key1", TTypeInfo(NTypeIds::Uint64) }, - {"key2", TTypeInfo(NTypeIds::Uint64) }, - {"field", TTypeInfo(NTypeIds::Utf8) } + const std::vector schema = { + NArrow::NTest::TTestColumn("key1", TTypeInfo(NTypeIds::Uint64)), + NArrow::NTest::TTestColumn("key2", TTypeInfo(NTypeIds::Uint64)), + NArrow::NTest::TTestColumn("field", TTypeInfo(NTypeIds::Utf8) ) }; const std::vector columnsIds = { 1, 2, 3}; PrepareTablet(runtime, tableId, schema, 2); @@ -245,8 +274,9 @@ Y_UNIT_TEST_SUITE(Normalizers) { auto batch = NConstruction::TRecordBatchConstructor({ key1Column, key2Column, column }).BuildBatch(20048); TString blobData = NArrow::SerializeBatchNoCompression(batch); - auto evWrite = std::make_unique(txId, NKikimrDataEvents::TEvWrite::MODE_PREPARE); - ui64 payloadIndex = NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(std::move(blobData)); + auto evWrite = std::make_unique(NKikimrDataEvents::TEvWrite::MODE_PREPARE); + evWrite->SetTxId(txId); + ui64 payloadIndex = NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_REPLACE, {ownerId, tableId, schemaVersion}, columnsIds, payloadIndex, NKikimrDataEvents::FORMAT_ARROW); TActorId sender = runtime.AllocateEdgeActor(); @@ -268,7 +298,7 @@ Y_UNIT_TEST_SUITE(Normalizers) { { auto readResult = ReadAllAsBatch(runtime, tableId, NOlap::TSnapshot(11, txId), schema); - UNIT_ASSERT_VALUES_EQUAL(readResult->num_rows(), 20048); + UNIT_ASSERT_VALUES_EQUAL(readResult->num_rows(), checker.RecordsCountAfterReboot(20048)); } } @@ -279,9 +309,21 @@ Y_UNIT_TEST_SUITE(Normalizers) { Y_UNIT_TEST(ColumnChunkNormalizer) { TestNormalizerImpl(); } +/* + Y_UNIT_TEST(PortionsNormalizer) { + TestNormalizerImpl(); + } +*/ - Y_UNIT_TEST(MinMaxNormalizer) { - TestNormalizerImpl(); + Y_UNIT_TEST(EmptyTablesNormalizer) { + class TLocalNormalizerChecker : public TNormalizerChecker { + public: + ui64 RecordsCountAfterReboot(const ui64) const override { + return 0; + } + }; + TLocalNormalizerChecker checker; + TestNormalizerImpl(checker); } } diff --git a/ydb/core/tx/columnshard/ut_rw/ya.make b/ydb/core/tx/columnshard/ut_rw/ya.make index 5932bccf759a..8f5af7869b5f 100644 --- a/ydb/core/tx/columnshard/ut_rw/ya.make +++ b/ydb/core/tx/columnshard/ut_rw/ya.make @@ -19,9 +19,11 @@ PEERDIR( library/cpp/regex/pcre library/cpp/svnversion ydb/core/testlib/default + ydb/core/tx/columnshard/test_helper ydb/core/tx/columnshard/hooks/abstract ydb/core/tx/columnshard/hooks/testing ydb/core/tx/columnshard/common/tests + ydb/core/tx/columnshard/test_helper ydb/services/metadata ydb/core/tx ydb/public/lib/yson_value @@ -33,6 +35,7 @@ SRCS( columnshard_ut_common.cpp ut_columnshard_read_write.cpp ut_normalizer.cpp + ut_backup.cpp ) END() diff --git a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp index d08e22258981..ae9da9114c76 100644 --- a/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp +++ b/ydb/core/tx/columnshard/ut_schema/ut_columnshard_schema.cpp @@ -9,6 +9,8 @@ #include #include #include +#include +#include #include #include @@ -20,6 +22,7 @@ namespace NKikimr { using namespace NTxUT; using namespace NColumnShard; +using TDefaultTestsController = NKikimr::NYDBTest::NColumnShard::TController; enum class EInitialEviction { None, @@ -27,71 +30,10 @@ enum class EInitialEviction { Tiering }; -class TWaitCompactionController: public NKikimr::NYDBTest::NColumnShard::TController { -private: - using TBase = NKikimr::NYDBTest::ICSController; - TAtomic TTLFinishedCounter = 0; - TAtomic TTLStartedCounter = 0; - NMetadata::NFetcher::ISnapshot::TPtr CurrentConfig; - bool CompactionEnabledFlag = true; - ui32 TiersModificationsCount = 0; -protected: - virtual void OnTieringModified(const std::shared_ptr& /*tiers*/) override { - ++TiersModificationsCount; - AFL_INFO(NKikimrServices::TX_COLUMNSHARD)("event", "OnTieringModified")("count", TiersModificationsCount); - } - virtual bool DoOnStartCompaction(std::shared_ptr& changes) override { - if (!CompactionEnabledFlag) { - changes = nullptr; - } - return true; - } - virtual bool DoOnWriteIndexComplete(const ui64 /*tabletId*/, const TString& changeClassName) override { - if (changeClassName.find("TTL") != TString::npos) { - AtomicIncrement(TTLFinishedCounter); - } - return true; - } - virtual bool DoOnWriteIndexStart(const ui64 /*tabletId*/, const TString& changeClassName) override { - if (changeClassName.find("TTL") != TString::npos) { - AtomicIncrement(TTLStartedCounter); - } - return true; - } -public: - void SetCompactionEnabled(const bool value) { - CompactionEnabledFlag = value; - } - void SetTiersSnapshot(TTestBasicRuntime& runtime, const TActorId& tabletActorId, const NMetadata::NFetcher::ISnapshot::TPtr& snapshot) { - CurrentConfig = snapshot; - ui32 startCount = TiersModificationsCount; - ProvideTieringSnapshot(runtime, tabletActorId, snapshot); - while (TiersModificationsCount == startCount) { - runtime.SimulateSleep(TDuration::Seconds(1)); - } - } - - virtual NMetadata::NFetcher::ISnapshot::TPtr GetFallbackTiersSnapshot() const override { - if (CurrentConfig) { - return CurrentConfig; - } else { - return TBase::GetFallbackTiersSnapshot(); - } - } - i64 GetTTLFinishedCounter() const { - return AtomicGet(TTLFinishedCounter); - } - - i64 GetTTLStartedCounter() const { - return AtomicGet(TTLStartedCounter); - } - -}; - namespace { -static const std::vector> testYdbSchema = TTestSchema::YdbSchema(); -static const std::vector> testYdbPk = TTestSchema::YdbPkSchema(); +static const std::vector testYdbSchema = TTestSchema::YdbSchema(); +static const std::vector testYdbPk = TTestSchema::YdbPkSchema(); std::shared_ptr UpdateColumn(std::shared_ptr batch, TString columnName, i64 seconds) { std::string name(columnName.c_str(), columnName.size()); @@ -184,7 +126,7 @@ bool CheckSame(const std::shared_ptr& batch, const ui32 expe } std::vector MakeData(const std::vector& ts, ui32 portionSize, ui32 overlapSize, const TString& ttlColumnName, - const std::vector>& ydbSchema = testYdbSchema) { + const std::vector& ydbSchema = testYdbSchema) { UNIT_ASSERT(ts.size() > 0); ui32 numRows = portionSize + (ts.size() - 1) * (portionSize - overlapSize); @@ -204,23 +146,6 @@ std::vector MakeData(const std::vector& ts, ui32 portionSize, ui3 return data; } -bool TestCreateTable(const TString& txBody, ui64 planStep = 1000, ui64 txId = 100) { - TTestBasicRuntime runtime; - TTester::Setup(runtime); - - TActorId sender = runtime.AllocateEdgeActor(); - CreateTestBootstrapper(runtime, - CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), - &CreateColumnShard); - - TDispatchOptions options; - options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); - runtime.DispatchEvents(options); - - // - return ProposeSchemaTx(runtime, sender, txBody, NOlap::TSnapshot(++planStep, ++txId)); -} - enum class EExpectedResult { OK_FINISHED, OK, @@ -232,16 +157,16 @@ static constexpr ui32 PORTION_ROWS = 80 * 1000; // ts[0] = 1600000000; // date -u --date='@1600000000' Sun Sep 13 12:26:40 UTC 2020 // ts[1] = 1620000000; // date -u --date='@1620000000' Mon May 3 00:00:00 UTC 2021 void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, - const std::vector>& ydbSchema = testYdbSchema) + const std::vector& ydbSchema = testYdbSchema) { - auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); - csControllerGuard->SetCompactionEnabled(false); + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csControllerGuard->DisableBackground(NKikimr::NYDBTest::ICSController::EBackground::Compaction); std::vector ts = {1600000000, 1620000000}; ui32 ttlIncSeconds = 1; - for (auto& [name, typeInfo] : ydbSchema) { - if (name == spec.TtlColumn) { - if (typeInfo.GetTypeId() == NTypeIds::Date) { + for (auto& c : ydbSchema) { + if (c.GetName() == spec.TtlColumn) { + if (c.GetType().GetTypeId() == NTypeIds::Date) { ttlIncSeconds = TDuration::Days(1).Seconds(); } break; @@ -280,11 +205,9 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, UNIT_ASSERT(!spec.TtlColumn.empty()); spec.EvictAfter = TDuration::Seconds(ttlSec); } - bool ok = ProposeSchemaTx(runtime, sender, + SetupSchema(runtime, sender, TTestSchema::CreateInitShardTxBody(tableId, ydbSchema, testYdbPk, spec, "/Root/olapStore"), NOlap::TSnapshot(++planStep, ++txId)); - UNIT_ASSERT(ok); - PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); if (spec.HasTiers()) { csControllerGuard->SetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec)); } @@ -310,6 +233,9 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, } else { TriggerTTL(runtime, sender, NOlap::TSnapshot(++planStep, ++txId), {tableId}, ts[0] + ttlIncSeconds, spec.TtlColumn); } + while (csControllerGuard->GetTTLFinishedCounter().Val() != csControllerGuard->GetTTLStartedCounter().Val()) { + runtime.SimulateSleep(TDuration::Seconds(1)); // wait all finished before (ttl especially) + } TAutoPtr handle; @@ -333,11 +259,9 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, } else { spec.EvictAfter = TDuration::Seconds(ttlSec); } - ok = ProposeSchemaTx(runtime, sender, + SetupSchema(runtime, sender, TTestSchema::AlterTableTxBody(tableId, 2, spec), NOlap::TSnapshot(++planStep, ++txId)); - UNIT_ASSERT(ok); - PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); if (spec.HasTiers()) { csControllerGuard->SetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec)); } @@ -347,18 +271,21 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, } else { TriggerTTL(runtime, sender, NOlap::TSnapshot(++planStep, ++txId), {tableId}, ts[1] + ttlIncSeconds, spec.TtlColumn); } + while (csControllerGuard->GetTTLFinishedCounter().Val() != csControllerGuard->GetTTLStartedCounter().Val()) { + runtime.SimulateSleep(TDuration::Seconds(1)); // wait all finished before (ttl especially) + } { --planStep; NOlap::NTests::TShardReader reader(runtime, TTestTxConfig::TxTablet0, tableId, NOlap::TSnapshot(planStep, Max())); - reader.SetReplyColumns({spec.TtlColumn}); + reader.SetReplyColumns({spec.TtlColumn, NOlap::TIndexInfo::SPEC_COL_PLAN_STEP}); auto rb = reader.ReadAll(); UNIT_ASSERT(reader.IsCorrectlyFinished()); UNIT_ASSERT(!rb || !rb->num_rows()); } // Disable TTL - ok = ProposeSchemaTx(runtime, sender, + auto ok = ProposeSchemaTx(runtime, sender, TTestSchema::AlterTableTxBody(tableId, 3, TTestSchema::TTableSpecials()), NOlap::TSnapshot(++planStep, ++txId)); UNIT_ASSERT(ok); @@ -378,6 +305,9 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, } else { TriggerTTL(runtime, sender, NOlap::TSnapshot(++planStep, ++txId), {tableId}, ts[0] - ttlIncSeconds, spec.TtlColumn); } + while (csControllerGuard->GetTTLFinishedCounter().Val() != csControllerGuard->GetTTLStartedCounter().Val()) { + runtime.SimulateSleep(TDuration::Seconds(1)); // wait all finished before (ttl especially) + } { --planStep; @@ -387,6 +317,14 @@ void TestTtl(bool reboots, bool internal, TTestSchema::TTableSpecials spec = {}, UNIT_ASSERT(reader.IsCorrectlyFinished()); UNIT_ASSERT(CheckSame(rb, PORTION_ROWS, spec.TtlColumn, ts[0])); } + + if (spec.NeedTestStatistics()) { + AFL_VERIFY(csControllerGuard->GetStatisticsUsageCount().Val()); + AFL_VERIFY(!csControllerGuard->GetMaxValueUsageCount().Val()); + } else { + AFL_VERIFY(!csControllerGuard->GetStatisticsUsageCount().Val()); + AFL_VERIFY(csControllerGuard->GetMaxValueUsageCount().Val()); + } } class TCountersContainer { @@ -568,8 +506,12 @@ std::vector> TestTiers(bool reboots, const std::vector(); + auto csControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + csControllerGuard->DisableBackground(NYDBTest::ICSController::EBackground::TTL); TTestBasicRuntime runtime; TTester::Setup(runtime); @@ -606,13 +548,9 @@ std::vector> TestTiers(bool reboots, const std::vector 0); - { - const bool ok = ProposeSchemaTx(runtime, sender, + SetupSchema(runtime, sender, TTestSchema::CreateInitShardTxBody(tableId, testYdbSchema, testYdbPk, specs[0], "/Root/olapStore"), NOlap::TSnapshot(++planStep, ++txId)); - UNIT_ASSERT(ok); - } - PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); if (specs[0].Tiers.size()) { csControllerGuard->SetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[0])); } @@ -627,6 +565,7 @@ std::vector> TestTiers(bool reboots, const std::vectorEnableBackground(NYDBTest::ICSController::EBackground::TTL); runtime.SetLogPriority(NKikimrServices::TX_COLUMNSHARD, NActors::NLog::PRI_DEBUG); @@ -661,11 +600,9 @@ std::vector> TestTiers(bool reboots, const std::vectorSetTiersSnapshot(runtime, sender, TTestSchema::BuildSnapshot(specs[i])); @@ -693,7 +630,6 @@ std::vector> TestTiers(bool reboots, const std::vector> TestTiers(bool reboots, const std::vectorIsCorrectlyFinished()); } } - while (csControllerGuard->GetTTLFinishedCounter() != csControllerGuard->GetTTLStartedCounter()) { + while (csControllerGuard->GetTTLFinishedCounter().Val() != csControllerGuard->GetTTLStartedCounter().Val()) { runtime.SimulateSleep(TDuration::Seconds(1)); // wait all finished before (ttl especially) } @@ -748,6 +684,14 @@ std::vector> TestTiers(bool reboots, const std::vectorGetStatisticsUsageCount().Val()); + AFL_VERIFY(!csControllerGuard->GetMaxValueUsageCount().Val()); + } else { + AFL_VERIFY(!csControllerGuard->GetStatisticsUsageCount().Val()); + AFL_VERIFY(csControllerGuard->GetMaxValueUsageCount().Val()); + } + return specRowsBytes; } @@ -831,8 +775,8 @@ class TEvictionChanges { TTestSchema::TTableSpecials InitialSpec(const EInitialEviction init, TDuration initTs) { TTestSchema::TTableSpecials spec; + spec.TtlColumn = "timestamp"; if (init == EInitialEviction::Ttl) { - spec.TtlColumn = "timestamp"; spec.EvictAfter = initTs; } return spec; @@ -894,10 +838,11 @@ std::vector> TestOneTierExport(const TTestSchema::TTableSp return rowsBytes; } -void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial = EInitialEviction::None, +void TestTwoHotTiers(bool reboot, bool changeTtl, const bool statisticsUsage, const EInitialEviction initial = EInitialEviction::None, bool revCompaction = false) { TTestSchema::TTableSpecials spec; spec.SetTtlColumn("timestamp"); + spec.SetNeedTestStatistics(statisticsUsage); spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier0").SetTtlColumn("timestamp")); spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier1").SetTtlColumn("timestamp")); spec.Tiers[(revCompaction ? 0 : 1)].SetCodec("zstd"); @@ -930,13 +875,13 @@ void TestTwoHotTiers(bool reboot, bool changeTtl, const EInitialEviction initial } } -void TestHotAndColdTiers(bool reboot, const EInitialEviction initial) { +void TestHotAndColdTiers(bool reboot, const EInitialEviction initial, const bool statisticsUsage) { TTestSchema::TTableSpecials spec; spec.SetTtlColumn("timestamp"); spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier0").SetTtlColumn("timestamp")); spec.Tiers.emplace_back(TTestSchema::TStorageTier("tier1").SetTtlColumn("timestamp")); spec.Tiers.back().S3 = TTestSchema::TStorageTier::FakeS3(); - + spec.SetNeedTestStatistics(statisticsUsage); TestTiersAndTtl(spec, reboot, initial); } @@ -979,6 +924,7 @@ void TestExport(bool reboot, TExportTestOpts&& opts = TExportTestOpts{}) { void TestDrop(bool reboots) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, @@ -996,11 +942,8 @@ void TestDrop(bool reboots) { ui64 planStep = 1000000000; // greater then delays ui64 txId = 100; - bool ok = ProposeSchemaTx(runtime, sender, TTestSchema::CreateTableTxBody(tableId, testYdbSchema, testYdbPk), + SetupSchema(runtime, sender, TTestSchema::CreateTableTxBody(tableId, testYdbSchema, testYdbPk), NOlap::TSnapshot(++planStep, ++txId)); - UNIT_ASSERT(ok); - PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); - // TString data1 = MakeTestBlob({0, PORTION_ROWS}, testYdbSchema); @@ -1027,9 +970,7 @@ void TestDrop(bool reboots) { } // Drop table - ok = ProposeSchemaTx(runtime, sender, TTestSchema::DropTableTxBody(tableId, 2), NOlap::TSnapshot(++planStep, ++txId)); - UNIT_ASSERT(ok); - PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); + SetupSchema(runtime, sender, TTestSchema::DropTableTxBody(tableId, 2), NOlap::TSnapshot(++planStep, ++txId)); if (reboots) { RebootTablet(runtime, TTestTxConfig::TxTablet0, sender); @@ -1049,6 +990,7 @@ void TestDrop(bool reboots) { void TestDropWriteRace() { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, @@ -1068,11 +1010,8 @@ void TestDropWriteRace() { NLongTxService::TLongTxId longTxId; UNIT_ASSERT(longTxId.ParseString("ydb://long-tx/01ezvvxjdk2hd4vdgjs68knvp8?node_id=1")); - bool ok = ProposeSchemaTx(runtime, sender, TTestSchema::CreateTableTxBody(tableId, testYdbSchema, testYdbPk), + SetupSchema(runtime, sender, TTestSchema::CreateTableTxBody(tableId, testYdbSchema, testYdbPk), NOlap::TSnapshot(++planStep, ++txId)); - UNIT_ASSERT(ok); - PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); - TString data = MakeTestBlob({0, 100}, testYdbSchema); UNIT_ASSERT(data.size() < NColumnShard::TLimits::MIN_BYTES_TO_INSERT); @@ -1083,7 +1022,7 @@ void TestDropWriteRace() { auto commitTxId = txId; // Drop table - ok = ProposeSchemaTx(runtime, sender, TTestSchema::DropTableTxBody(tableId, 2), NOlap::TSnapshot(++planStep, ++txId)); + auto ok = ProposeSchemaTx(runtime, sender, TTestSchema::DropTableTxBody(tableId, 2), NOlap::TSnapshot(++planStep, ++txId)); if (ok) { PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); } @@ -1095,6 +1034,7 @@ void TestDropWriteRace() { void TestCompaction(std::optional numWrites = {}) { TTestBasicRuntime runtime; TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); TActorId sender = runtime.AllocateEdgeActor(); CreateTestBootstrapper(runtime, @@ -1111,11 +1051,8 @@ void TestCompaction(std::optional numWrites = {}) { ui64 planStep = 100; ui64 txId = 100; - bool ok = ProposeSchemaTx(runtime, sender, TTestSchema::CreateTableTxBody(tableId, testYdbSchema, testYdbPk), + SetupSchema(runtime, sender, TTestSchema::CreateTableTxBody(tableId, testYdbSchema, testYdbPk), NOlap::TSnapshot(++planStep, ++txId)); - UNIT_ASSERT(ok); - PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); - // Set tiering ui64 ts = 1620000000; @@ -1131,11 +1068,8 @@ void TestCompaction(std::optional numWrites = {}) { spec.Tiers.back().EvictAfter = allow; spec.Tiers.back().S3 = TTestSchema::TStorageTier::FakeS3(); - ok = ProposeSchemaTx(runtime, sender, TTestSchema::AlterTableTxBody(tableId, 1, spec), + SetupSchema(runtime, sender, TTestSchema::AlterTableTxBody(tableId, 1, spec), NOlap::TSnapshot(++planStep, ++txId)); - UNIT_ASSERT(ok); - PlanSchemaTx(runtime, sender, NOlap::TSnapshot(planStep, txId)); - ProvideTieringSnapshot(runtime, sender, TTestSchema::BuildSnapshot(spec)); // Writes @@ -1189,16 +1123,31 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { NTypeIds::Datetime }; - auto schema = TTestSchema::YdbSchema({"k0", TTypeInfo(NTypeIds::Timestamp)}); - auto pk = schema; - pk.resize(4); + TTestBasicRuntime runtime; + TTester::Setup(runtime); + auto csDefaultControllerGuard = NKikimr::NYDBTest::TControllers::RegisterCSControllerGuard(); + + using namespace NTxUT; + CreateTestBootstrapper(runtime, CreateTestTabletInfo(TTestTxConfig::TxTablet0, TTabletTypes::ColumnShard), &CreateColumnShard); + + TDispatchOptions options; + options.FinalEvents.push_back(TDispatchOptions::TFinalEventCondition(TEvTablet::EvBoot)); + runtime.DispatchEvents(options); + + TActorId sender = runtime.AllocateEdgeActor(); + + auto schema = TTestSchema::YdbSchema(NArrow::NTest::TTestColumn("k0", TTypeInfo(NTypeIds::Timestamp))); + auto pk = NArrow::NTest::TTestColumn::CropSchema(schema, 4); + + ui64 planStep = 1000; + ui64 txId = 100; + ui64 generation = 0; for (auto& ydbType : intTypes) { - schema[0].second = TTypeInfo(ydbType); - pk[0].second = TTypeInfo(ydbType); - auto txBody = TTestSchema::CreateTableTxBody(tableId, schema, pk); - bool ok = TestCreateTable(txBody); - UNIT_ASSERT(ok); + schema[0].SetType(TTypeInfo(ydbType)); + pk[0].SetType(TTypeInfo(ydbType)); + auto txBody = TTestSchema::CreateTableTxBody(tableId++, schema, pk, {}, ++generation); + SetupSchema(runtime, sender, txBody, NOlap::TSnapshot(planStep++, txId++)); } // TODO: support float types @@ -1208,11 +1157,10 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { }; for (auto& ydbType : floatTypes) { - schema[0].second = TTypeInfo(ydbType); - pk[0].second = TTypeInfo(ydbType); - auto txBody = TTestSchema::CreateTableTxBody(tableId, schema, pk); - bool ok = TestCreateTable(txBody); - UNIT_ASSERT(!ok); + schema[0].SetType(TTypeInfo(ydbType)); + pk[0].SetType(TTypeInfo(ydbType)); + auto txBody = TTestSchema::CreateTableTxBody(tableId++, schema, pk, {}, ++generation); + SetupSchema(runtime, sender, txBody, NOlap::TSnapshot(planStep++, txId++), false); } std::vector strTypes = { @@ -1221,11 +1169,10 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { }; for (auto& ydbType : strTypes) { - schema[0].second = TTypeInfo(ydbType); - pk[0].second = TTypeInfo(ydbType); - auto txBody = TTestSchema::CreateTableTxBody(tableId, schema, pk); - bool ok = TestCreateTable(txBody); - UNIT_ASSERT(ok); + schema[0].SetType(TTypeInfo(ydbType)); + pk[0].SetType(TTypeInfo(ydbType)); + auto txBody = TTestSchema::CreateTableTxBody(tableId++, schema, pk, {}, ++generation); + SetupSchema(runtime, sender, txBody, NOlap::TSnapshot(planStep++, txId++)); } std::vector xsonTypes = { @@ -1235,11 +1182,10 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { }; for (auto& ydbType : xsonTypes) { - schema[0].second = TTypeInfo(ydbType); - pk[0].second = TTypeInfo(ydbType); - auto txBody = TTestSchema::CreateTableTxBody(tableId, schema, pk); - bool ok = TestCreateTable(txBody); - UNIT_ASSERT(!ok); + schema[0].SetType(TTypeInfo(ydbType)); + pk[0].SetType(TTypeInfo(ydbType)); + auto txBody = TTestSchema::CreateTableTxBody(tableId++, schema, pk, {}, ++generation); + SetupSchema(runtime, sender, txBody, NOlap::TSnapshot(planStep++, txId++), false); } } @@ -1250,8 +1196,8 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { Y_UNIT_TEST(ExternalTTL_Types) { auto ydbSchema = testYdbSchema; for (auto typeId : {NTypeIds::Datetime, NTypeIds::Date, NTypeIds::Uint32, NTypeIds::Uint64}) { - UNIT_ASSERT_EQUAL(ydbSchema[8].first, "saved_at"); - ydbSchema[8].second = TTypeInfo(typeId); + UNIT_ASSERT_EQUAL(ydbSchema[8].GetName(), "saved_at"); + ydbSchema[8].SetType(TTypeInfo(typeId)); TTestSchema::TTableSpecials specs; specs.SetTtlColumn("saved_at"); @@ -1272,8 +1218,8 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { Y_UNIT_TEST(InternalTTL_Types) { auto ydbSchema = testYdbSchema; for (auto typeId : {NTypeIds::Datetime, NTypeIds::Date, NTypeIds::Uint32, NTypeIds::Uint64}) { - UNIT_ASSERT_EQUAL(ydbSchema[8].first, "saved_at"); - ydbSchema[8].second = TTypeInfo(typeId); + UNIT_ASSERT_EQUAL(ydbSchema[8].GetName(), "saved_at"); + ydbSchema[8].SetType(TTypeInfo(typeId)); TTestSchema::TTableSpecials specs; specs.SetTtlColumn("saved_at"); @@ -1320,64 +1266,91 @@ Y_UNIT_TEST_SUITE(TColumnShardTestSchema) { // TODO: EnableOneTierAfterTtl, EnableTtlAfterOneTier Y_UNIT_TEST(HotTiers) { - TestTwoHotTiers(false, false); + TestTwoHotTiers(false, false, false); } Y_UNIT_TEST(RebootHotTiers) { - TestTwoHotTiers(true, false); + TestTwoHotTiers(true, false, false); + } + + Y_UNIT_TEST(HotTiersWithStat) { + TestTwoHotTiers(false, false, true); + } + + Y_UNIT_TEST(RebootHotTiersWithStat) { + TestTwoHotTiers(true, false, true); } Y_UNIT_TEST(HotTiersRevCompression) { - TestTwoHotTiers(false, false, EInitialEviction::None, true); + TestTwoHotTiers(false, false, false, EInitialEviction::None, true); } Y_UNIT_TEST(RebootHotTiersRevCompression) { - TestTwoHotTiers(true, false, EInitialEviction::None, true); + TestTwoHotTiers(true, false, false, EInitialEviction::None, true); } Y_UNIT_TEST(HotTiersTtl) { NColumnShard::gAllowLogBatchingDefaultValue = false; - TestTwoHotTiers(false, true); + TestTwoHotTiers(false, true, false); } Y_UNIT_TEST(RebootHotTiersTtl) { NColumnShard::gAllowLogBatchingDefaultValue = false; - TestTwoHotTiers(true, true); + TestTwoHotTiers(true, true, false); + } + + Y_UNIT_TEST(HotTiersTtlWithStat) { + NColumnShard::gAllowLogBatchingDefaultValue = false; + TestTwoHotTiers(false, true, true); + } + + Y_UNIT_TEST(RebootHotTiersTtlWithStat) { + NColumnShard::gAllowLogBatchingDefaultValue = false; + TestTwoHotTiers(true, true, true); } Y_UNIT_TEST(HotTiersAfterTtl) { - TestTwoHotTiers(false, false, EInitialEviction::Ttl); + TestTwoHotTiers(false, false, false, EInitialEviction::Ttl); } Y_UNIT_TEST(RebootHotTiersAfterTtl) { - TestTwoHotTiers(true, false, EInitialEviction::Ttl); + TestTwoHotTiers(true, false, false, EInitialEviction::Ttl); } // TODO: EnableTtlAfterHotTiers Y_UNIT_TEST(ColdTiers) { - TestHotAndColdTiers(false, EInitialEviction::Tiering); + TestHotAndColdTiers(false, EInitialEviction::Tiering, false); } Y_UNIT_TEST(RebootColdTiers) { //NColumnShard::gAllowLogBatchingDefaultValue = false; - TestHotAndColdTiers(true, EInitialEviction::Tiering); + TestHotAndColdTiers(true, EInitialEviction::Tiering, false); + } + + Y_UNIT_TEST(ColdTiersWithStat) { + TestHotAndColdTiers(false, EInitialEviction::Tiering, true); + } + + Y_UNIT_TEST(RebootColdTiersWithStat) { + //NColumnShard::gAllowLogBatchingDefaultValue = false; + TestHotAndColdTiers(true, EInitialEviction::Tiering, true); } Y_UNIT_TEST(EnableColdTiersAfterNoEviction) { - TestHotAndColdTiers(false, EInitialEviction::None); + TestHotAndColdTiers(false, EInitialEviction::None, false); } Y_UNIT_TEST(RebootEnableColdTiersAfterNoEviction) { - TestHotAndColdTiers(true, EInitialEviction::None); + TestHotAndColdTiers(true, EInitialEviction::None, false); } Y_UNIT_TEST(EnableColdTiersAfterTtl) { - TestHotAndColdTiers(false, EInitialEviction::Ttl); + TestHotAndColdTiers(false, EInitialEviction::Ttl, false); } Y_UNIT_TEST(RebootEnableColdTiersAfterTtl) { - TestHotAndColdTiers(true, EInitialEviction::Ttl); + TestHotAndColdTiers(true, EInitialEviction::Ttl, false); } Y_UNIT_TEST(OneColdTier) { diff --git a/ydb/core/tx/columnshard/ut_schema/ya.make b/ydb/core/tx/columnshard/ut_schema/ya.make index f94fea4e912b..d3b7fdd5d842 100644 --- a/ydb/core/tx/columnshard/ut_schema/ya.make +++ b/ydb/core/tx/columnshard/ut_schema/ya.make @@ -21,6 +21,7 @@ PEERDIR( ydb/core/testlib/default ydb/core/tx/columnshard/hooks/abstract ydb/core/tx/columnshard/hooks/testing + ydb/core/tx/columnshard/test_helper ydb/services/metadata ydb/core/tx ydb/public/lib/yson_value diff --git a/ydb/core/tx/columnshard/write_actor.cpp b/ydb/core/tx/columnshard/write_actor.cpp index 5d858afe6564..25cbb99b5915 100644 --- a/ydb/core/tx/columnshard/write_actor.cpp +++ b/ydb/core/tx/columnshard/write_actor.cpp @@ -36,12 +36,12 @@ class TWriteActor: public TActorBootstrapped, public TMonitoringObj if (status != NKikimrProto::OK) { ACFL_ERROR("event", "TEvPutResult")("blob_id", msg->Id.ToString())("status", status)("error", msg->ErrorReason); - WriteController->Abort(); + WriteController->Abort("cannot write blob " + msg->Id.ToString() + ", status: " + ::ToString(status) + ". reason: " + msg->ErrorReason); return SendResultAndDie(ctx, status); } WriteController->OnBlobWriteResult(*msg); - if (WriteController->IsBlobActionsReady()) { + if (WriteController->IsReady()) { return SendResultAndDie(ctx, NKikimrProto::OK); } } @@ -84,7 +84,7 @@ class TWriteActor: public TActorBootstrapped, public TMonitoringObj writeInfo->GetWriteOperator()->SendWriteBlobRequest(writeInfo->GetData(), writeInfo->GetBlobId()); } - if (WriteController->IsBlobActionsReady()) { + if (WriteController->IsReady()) { return SendResultAndDie(ctx, NKikimrProto::OK); } Become(&TThis::StateWait); diff --git a/ydb/core/tx/columnshard/ya.make b/ydb/core/tx/columnshard/ya.make index 924a67d6350e..2c8103b24329 100644 --- a/ydb/core/tx/columnshard/ya.make +++ b/ydb/core/tx/columnshard/ya.make @@ -4,17 +4,13 @@ SRCS( background_controller.cpp blob.cpp blob_cache.cpp - blob_manager.cpp columnshard__init.cpp columnshard__notify_tx_completion.cpp columnshard__plan_step.cpp columnshard__progress_tx.cpp columnshard__propose_cancel.cpp columnshard__propose_transaction.cpp - columnshard__read_base.cpp columnshard__scan.cpp - columnshard__index_scan.cpp - columnshard__stats_scan.cpp columnshard__write.cpp columnshard__write_index.cpp columnshard.cpp @@ -25,6 +21,7 @@ SRCS( columnshard_view.cpp counters.cpp defs.cpp + inflight_request_tracker.cpp write_actor.cpp tables_manager.cpp ) @@ -45,6 +42,7 @@ PEERDIR( ydb/core/tx/time_cast ydb/core/tx/columnshard/engines ydb/core/tx/columnshard/engines/writer + ydb/core/tx/columnshard/engines/reader/abstract ydb/core/tx/columnshard/counters ydb/core/tx/columnshard/common ydb/core/tx/columnshard/splitter @@ -53,9 +51,14 @@ PEERDIR( ydb/core/tx/columnshard/transactions/operators ydb/core/tx/columnshard/blobs_reader ydb/core/tx/columnshard/blobs_action + ydb/core/tx/columnshard/data_locks + ydb/core/tx/columnshard/data_sharing + ydb/core/tx/columnshard/export ydb/core/tx/columnshard/resource_subscriber ydb/core/tx/columnshard/normalizer/granule ydb/core/tx/columnshard/normalizer/portion + ydb/core/tx/columnshard/normalizer/tables + ydb/core/tx/columnshard/blobs_action/storages_manager ydb/core/tx/tiering ydb/core/tx/conveyor/usage ydb/core/tx/tracing diff --git a/ydb/core/tx/data_events/columnshard_splitter.h b/ydb/core/tx/data_events/columnshard_splitter.h index 9c2b2c47ad5d..fabc0d2c5d20 100644 --- a/ydb/core/tx/data_events/columnshard_splitter.h +++ b/ydb/core/tx/data_events/columnshard_splitter.h @@ -174,7 +174,7 @@ class TColumnShardShardsSplitter : public IShardsSplitter { col.HasTypeInfo() ? &col.GetTypeInfo() : nullptr); columns.emplace_back(col.GetName(), typeInfoMod.TypeInfo); } - return NArrow::MakeArrowSchema(columns); + return NArrow::TStatusValidator::GetValid(NArrow::MakeArrowSchema(columns)); } }; } diff --git a/ydb/core/tx/data_events/events.h b/ydb/core/tx/data_events/events.h index 8fab00c8699d..a875bfe3b1da 100644 --- a/ydb/core/tx/data_events/events.h +++ b/ydb/core/tx/data_events/events.h @@ -38,13 +38,29 @@ struct TDataEvents { public: TEvWrite() = default; - TEvWrite(ui64 txId, NKikimrDataEvents::TEvWrite::ETxMode txMode) { - Y_ABORT_UNLESS(txMode != NKikimrDataEvents::TEvWrite::MODE_UNSPECIFIED); + TEvWrite(const ui64 txId, NKikimrDataEvents::TEvWrite::ETxMode txMode) { + Y_ABORT_UNLESS(txMode != NKikimrDataEvents::TEvWrite::MODE_UNSPECIFIED); + Record.SetTxMode(txMode); Record.SetTxId(txId); + } + + TEvWrite(NKikimrDataEvents::TEvWrite::ETxMode txMode) { + Y_ABORT_UNLESS(txMode != NKikimrDataEvents::TEvWrite::MODE_UNSPECIFIED); Record.SetTxMode(txMode); } + TEvWrite& SetTxId(const ui64 txId) { + Record.SetTxId(txId); + return *this; + } + + TEvWrite& SetLockId(const ui64 lockTxId, const ui64 lockNodeId) { + Record.SetLockTxId(lockTxId); + Record.SetLockNodeId(lockNodeId); + return *this; + } + void AddOperation(NKikimrDataEvents::TEvWrite_TOperation::EOperationType operationType, const TTableId& tableId, const std::vector& columnIds, ui64 payloadIndex, NKikimrDataEvents::EDataFormat payloadFormat) { Y_ABORT_UNLESS(operationType != NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UNSPECIFIED); @@ -87,7 +103,7 @@ struct TDataEvents { return result; } - static std::unique_ptr BuildCommited(const ui64 origin, const ui64 txId) { + static std::unique_ptr BuildCompleted(const ui64 origin, const ui64 txId) { auto result = std::make_unique(); result->Record.SetOrigin(origin); result->Record.SetTxId(txId); diff --git a/ydb/core/tx/data_events/payload_helper.h b/ydb/core/tx/data_events/payload_helper.h index 1905b60150c9..8d0b503b911c 100644 --- a/ydb/core/tx/data_events/payload_helper.h +++ b/ydb/core/tx/data_events/payload_helper.h @@ -4,20 +4,26 @@ namespace NKikimr::NEvWrite { -class IPayloadData { +class IPayloadReader { public: virtual TString GetDataFromPayload(const ui64 index) const = 0; + virtual ~IPayloadReader() { + } +}; + +class IPayloadWriter { +public: virtual ui64 AddDataToPayload(TString&& blobData) = 0; - virtual ~IPayloadData() { + virtual ~IPayloadWriter() { } }; template -class TPayloadHelper: public IPayloadData { - TEvent& Event; +class TPayloadReader: public IPayloadReader { + const TEvent& Event; public: - TPayloadHelper(TEvent& ev) + TPayloadReader(const TEvent& ev) : Event(ev) { } @@ -28,6 +34,17 @@ class TPayloadHelper: public IPayloadData { rope.Begin().ExtractPlainDataAndAdvance(data.Detach(), data.size()); return data; } +}; + +template +class TPayloadWriter: public IPayloadWriter { + TEvent& Event; + +public: + TPayloadWriter(TEvent& ev) + : Event(ev) + { + } ui64 AddDataToPayload(TString&& blobData) override { TRope rope; @@ -35,5 +52,4 @@ class TPayloadHelper: public IPayloadData { return Event.AddPayload(std::move(rope)); } }; - } diff --git a/ydb/core/tx/datashard/datashard__kqp_scan.cpp b/ydb/core/tx/datashard/datashard__kqp_scan.cpp index 603df1e0aa6b..e54fbd4b10cc 100644 --- a/ydb/core/tx/datashard/datashard__kqp_scan.cpp +++ b/ydb/core/tx/datashard/datashard__kqp_scan.cpp @@ -71,8 +71,8 @@ class TKqpScan : public TActor, public NTable::IScan { schema.emplace_back(column.Name, column.Type); } BatchBuilder->Reserve(INIT_BATCH_ROWS); - bool started = BatchBuilder->Start(schema); - YQL_ENSURE(started, "Failed to start BatchBuilder"); + auto started = BatchBuilder->Start(schema); + YQL_ENSURE(started.ok(), "Failed to start BatchBuilder: " + started.ToString()); } } @@ -436,7 +436,7 @@ class TKqpScan : public TActor, public NTable::IScan { if (DataFormat == NKikimrDataEvents::FORMAT_ARROW) { FlushBatchToResult(); - sendBytes = NArrow::GetBatchDataSize(Result->ArrowBatch); + sendBytes = NArrow::GetTableDataSize(Result->ArrowBatch); // Batch is stored inside BatchBuilder until we flush it into Result. So we verify number of rows here. YQL_ENSURE(Rows == 0 && Result->ArrowBatch == nullptr || Result->ArrowBatch->num_rows() == (i64) Rows); } else { @@ -489,7 +489,7 @@ class TKqpScan : public TActor, public NTable::IScan { // send a batch and try to send an empty batch again without adding rows, then a copy of the batch will be send // instead. So we check Rows here. if (Rows != 0) { - Result->ArrowBatch = Tags.empty() ? NArrow::CreateNoColumnsBatch(Rows) : BatchBuilder->FlushBatch(true); + Result->ArrowBatch = NArrow::TStatusValidator::GetValid(arrow::Table::FromRecordBatches({Tags.empty() ? NArrow::CreateNoColumnsBatch(Rows) : BatchBuilder->FlushBatch(true)})); } } diff --git a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp index c2b2febb0067..940f4179aa76 100644 --- a/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp +++ b/ydb/core/tx/datashard/datashard_ut_read_iterator.cpp @@ -760,7 +760,7 @@ struct TTestHelper { TSerializedCellMatrix matrix(cells, 1, columnCount); auto evWrite = std::make_unique(txId, txMode); - ui64 payloadIndex = NKikimr::NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(matrix.ReleaseBuffer()); + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(matrix.ReleaseBuffer()); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, table.TableId, columnIds, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); return Write(*Server->GetRuntime(), Sender, table.TabletId, std::move(evWrite)); diff --git a/ydb/core/tx/datashard/datashard_ut_write.cpp b/ydb/core/tx/datashard/datashard_ut_write.cpp index b2a6a33c8740..55f8dcd2865a 100644 --- a/ydb/core/tx/datashard/datashard_ut_write.cpp +++ b/ydb/core/tx/datashard/datashard_ut_write.cpp @@ -94,7 +94,7 @@ Y_UNIT_TEST_SUITE(DataShardWrite) { TSerializedCellMatrix matrix({TCell(hugeStringValue.c_str(), hugeStringValue.size())}, 1, 1); auto evWrite = std::make_unique(100, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); - ui64 payloadIndex = NKikimr::NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(matrix.ReleaseBuffer()); + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(matrix.ReleaseBuffer()); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, tableId, {1}, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); const auto& record = Write(runtime, sender, shards[0], std::move(evWrite), NKikimrDataEvents::TEvWriteResult::STATUS_BAD_REQUEST); diff --git a/ydb/core/tx/datashard/datashard_write_operation.cpp b/ydb/core/tx/datashard/datashard_write_operation.cpp index 5bd48c0a0311..5ebb2dfc4102 100644 --- a/ydb/core/tx/datashard/datashard_write_operation.cpp +++ b/ydb/core/tx/datashard/datashard_write_operation.cpp @@ -89,8 +89,8 @@ bool TValidatedWriteTx::ParseRecord(const TDataShard::TTableInfos& tableInfos) { return false; } - NEvWrite::TPayloadHelper payloadHelper(*Ev->Get()); - TString payload = payloadHelper.GetDataFromPayload(RecordOperation().GetPayloadIndex()); + NEvWrite::TPayloadReader payloadReader(*Ev->Get()); + TString payload = payloadReader.GetDataFromPayload(RecordOperation().GetPayloadIndex()); if (!TSerializedCellMatrix::TryParse(payload,Matrix)) { diff --git a/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp b/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp index f79a3de14c25..6e2bc0886f64 100644 --- a/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp +++ b/ydb/core/tx/datashard/ut_common/datashard_ut_common.cpp @@ -1873,7 +1873,7 @@ std::unique_ptr MakeWriteRequest(ui64 txId, NKik UNIT_ASSERT(blobData.size() < 8_MB); auto evWrite = std::make_unique(txId, txMode); - ui64 payloadIndex = NKikimr::NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(std::move(blobData)); + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, tableId, columnIds, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); return evWrite; @@ -1974,7 +1974,7 @@ TTestActorRuntimeBase::TEventObserverHolderPair ReplaceEvProposeTransactionWithE std::iota(columnIds.begin(), columnIds.end(), 1); auto evWrite = std::make_unique(txId, txMode); - ui64 payloadIndex = NKikimr::NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(std::move(blobData)); + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(blobData)); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, tableId, columnIds, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); // Copy locks diff --git a/ydb/core/tx/datashard/write_unit.cpp b/ydb/core/tx/datashard/write_unit.cpp index 15d4065b279c..c2fd1d0c174a 100644 --- a/ydb/core/tx/datashard/write_unit.cpp +++ b/ydb/core/tx/datashard/write_unit.cpp @@ -97,7 +97,7 @@ class TWriteUnit : public TExecutionUnit { self->IncCounter(COUNTER_WRITE_ROWS, matrix.GetRowCount()); self->IncCounter(COUNTER_WRITE_BYTES, matrix.GetBuffer().size()); - writeOp->SetWriteResult(NEvents::TDataEvents::TEvWriteResult::BuildCommited(self->TabletID(), writeOp->GetTxId())); + writeOp->SetWriteResult(NEvents::TDataEvents::TEvWriteResult::BuildCompleted(self->TabletID(), writeOp->GetTxId())); LOG_DEBUG_S(ctx, NKikimrServices::TX_DATASHARD, "Executed write operation for " << *writeOp << " at " << self->TabletID()); } diff --git a/ydb/core/tx/program/program.h b/ydb/core/tx/program/program.h index dae9ae4d94a4..dfeb714a1481 100644 --- a/ydb/core/tx/program/program.h +++ b/ydb/core/tx/program/program.h @@ -90,15 +90,8 @@ class TProgramContainer { } } - std::shared_ptr ApplyEarlyFilter(std::shared_ptr& batch, const bool useFilter) const { - if (Program) { - return Program->ApplyEarlyFilter(batch, useFilter); - } else { - return nullptr; - } - } - - inline arrow::Status ApplyProgram(std::shared_ptr& batch) const { + template + inline arrow::Status ApplyProgram(std::shared_ptr& batch) const { if (Program) { return Program->ApplyTo(batch, NArrow::GetCustomExecContext()); } else if (OverrideProcessingColumnsVector) { diff --git a/ydb/core/tx/schemeshard/common/validation.cpp b/ydb/core/tx/schemeshard/common/validation.cpp new file mode 100644 index 000000000000..51615e4e1119 --- /dev/null +++ b/ydb/core/tx/schemeshard/common/validation.cpp @@ -0,0 +1,30 @@ +#include "validation.h" + +namespace NKikimr::NSchemeShard::NValidation { + +bool TTTLValidator::ValidateUnit(const NScheme::TTypeId columnType, NKikimrSchemeOp::TTTLSettings::EUnit unit, TString& errStr) { + switch (columnType) { + case NScheme::NTypeIds::Date: + case NScheme::NTypeIds::Datetime: + case NScheme::NTypeIds::Timestamp: + if (unit != NKikimrSchemeOp::TTTLSettings::UNIT_AUTO) { + errStr = "To enable TTL on date type column 'DateTypeColumnModeSettings' should be specified"; + return false; + } + break; + case NScheme::NTypeIds::Uint32: + case NScheme::NTypeIds::Uint64: + case NScheme::NTypeIds::DyNumber: + if (unit == NKikimrSchemeOp::TTTLSettings::UNIT_AUTO) { + errStr = "To enable TTL on integral type column 'ValueSinceUnixEpochModeSettings' should be specified"; + return false; + } + break; + default: + errStr = "Unsupported column type"; + return false; + } + return true; +} + +} \ No newline at end of file diff --git a/ydb/core/tx/schemeshard/common/validation.h b/ydb/core/tx/schemeshard/common/validation.h new file mode 100644 index 000000000000..12d1e801e2ad --- /dev/null +++ b/ydb/core/tx/schemeshard/common/validation.h @@ -0,0 +1,13 @@ +#pragma once +#include +#include + +#include + +namespace NKikimr::NSchemeShard::NValidation { + +class TTTLValidator { +public: + static bool ValidateUnit(const NScheme::TTypeId columnType, NKikimrSchemeOp::TTTLSettings::EUnit unit, TString& errStr); +}; +} \ No newline at end of file diff --git a/ydb/core/tx/schemeshard/common/ya.make b/ydb/core/tx/schemeshard/common/ya.make new file mode 100644 index 000000000000..179f7adf4b9a --- /dev/null +++ b/ydb/core/tx/schemeshard/common/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + validation.cpp +) + +PEERDIR( + ydb/core/protos + ydb/public/lib/scheme_types +) + +END() diff --git a/ydb/core/tx/schemeshard/olap/columns/update.cpp b/ydb/core/tx/schemeshard/olap/columns/update.cpp index b84327227531..16d2bd448b66 100644 --- a/ydb/core/tx/schemeshard/olap/columns/update.cpp +++ b/ydb/core/tx/schemeshard/olap/columns/update.cpp @@ -3,6 +3,7 @@ #include #include #include +#include namespace NKikimr::NSchemeShard { @@ -14,6 +15,7 @@ namespace NKikimr::NSchemeShard { Name = columnSchema.GetName(); NotNullFlag = columnSchema.GetNotNull(); TypeName = columnSchema.GetType(); + StorageId = columnSchema.GetStorageId(); if (columnSchema.HasSerializer()) { NArrow::NSerialization::TSerializerContainer serializer; if (!serializer.DeserializeFromProto(columnSchema.GetSerializer())) { @@ -41,20 +43,33 @@ namespace NKikimr::NSchemeShard { return false; } - auto typeName = NMiniKQL::AdaptLegacyYqlType(TypeName); - Y_ABORT_UNLESS(AppData()->TypeRegistry); - const NScheme::IType* type = AppData()->TypeRegistry->GetType(typeName); - if (!type) { - errors.AddError(TStringBuilder() << "Type '" << typeName << "' specified for column '" << Name << "' is not supported"); - return false; - } - if (!NScheme::NTypeIds::IsYqlType(type->GetTypeId())) { - errors.AddError(TStringBuilder() << "Type '" << typeName << "' specified for column '" << Name << "' is not supported"); - return false;; + if (const auto& typeName = NMiniKQL::AdaptLegacyYqlType(TypeName); typeName.StartsWith("pg")) { + const auto typeDesc = NPg::TypeDescFromPgTypeName(typeName); + if (!(typeDesc && TOlapColumnAdd::IsAllowedPgType(NPg::PgTypeIdFromTypeDesc(typeDesc)))) { + errors.AddError(TStringBuilder() << "Type '" << typeName << "' specified for column '" << Name << "' is not supported"); + return false; + } + Type = NScheme::TTypeInfo(NScheme::NTypeIds::Pg, typeDesc); + } else { + Y_ABORT_UNLESS(AppData()->TypeRegistry); + const NScheme::IType* type = AppData()->TypeRegistry->GetType(typeName); + if (!type) { + errors.AddError(TStringBuilder() << "Type '" << typeName << "' specified for column '" << Name << "' is not supported"); + return false; + } + if (!NScheme::NTypeIds::IsYqlType(type->GetTypeId())) { + errors.AddError(TStringBuilder() << "Type '" << typeName << "' specified for column '" << Name << "' is not supported"); + return false;; + } + Type = NScheme::TTypeInfo(type->GetTypeId()); + if (!IsAllowedType(type->GetTypeId())){ + errors.AddError(TStringBuilder() << "Type '" << typeName << "' specified for column '" << Name << "' is not supported"); + return false; + } } - Type = NScheme::TTypeInfo(type->GetTypeId()); - if (!IsAllowedType(type->GetTypeId())){ - errors.AddError(TStringBuilder() << "Type '" << typeName << "' specified for column '" << Name << "' is not supported"); + const auto arrowTypeStatus = NArrow::GetArrowType(Type).status(); + if (!arrowTypeStatus.ok()) { + errors.AddError(TStringBuilder() << "Column '" << Name << "': " << arrowTypeStatus.ToString()); return false; } return true; @@ -63,6 +78,7 @@ namespace NKikimr::NSchemeShard { void TOlapColumnAdd::ParseFromLocalDB(const NKikimrSchemeOp::TOlapColumnDescription& columnSchema) { Name = columnSchema.GetName(); TypeName = columnSchema.GetType(); + StorageId = columnSchema.GetStorageId(); if (columnSchema.HasTypeInfo()) { Type = NScheme::TypeInfoModFromProtoColumnType( @@ -79,7 +95,7 @@ namespace NKikimr::NSchemeShard { Serializer = serializer; } else if (columnSchema.HasCompression()) { NArrow::NSerialization::TSerializerContainer serializer; - AFL_VERIFY(serializer.DeserializeFromProto(columnSchema.GetCompression())); + serializer.DeserializeFromProto(columnSchema.GetCompression()).Validate(); Serializer = serializer; } if (columnSchema.HasDictionaryEncoding()) { @@ -90,10 +106,15 @@ namespace NKikimr::NSchemeShard { NotNullFlag = columnSchema.GetNotNull(); } + bool TOlapColumnAdd::IsAllowedPgType(ui32 /*pgTypeId*/) { + return false; + } + void TOlapColumnAdd::Serialize(NKikimrSchemeOp::TOlapColumnDescription& columnSchema) const { columnSchema.SetName(Name); columnSchema.SetType(TypeName); columnSchema.SetNotNull(NotNullFlag); + columnSchema.SetStorageId(StorageId); if (Serializer) { Serializer->SerializeToProto(*columnSchema.MutableSerializer()); } @@ -110,6 +131,9 @@ namespace NKikimr::NSchemeShard { bool TOlapColumnAdd::ApplyDiff(const TOlapColumnDiff& diffColumn, IErrorCollector& errors) { Y_ABORT_UNLESS(GetName() == diffColumn.GetName()); + if (diffColumn.GetStorageId()) { + StorageId = *diffColumn.GetStorageId(); + } if (diffColumn.GetSerializer()) { Serializer = diffColumn.GetSerializer(); } @@ -139,9 +163,12 @@ namespace NKikimr::NSchemeShard { return true; } - bool TOlapColumnAdd::IsAllowedFirstPkType(ui32 typeId) { + bool TOlapColumnAdd::IsAllowedPkType(ui32 typeId) { switch (typeId) { + case NYql::NProto::Int8: case NYql::NProto::Uint8: // Byte + case NYql::NProto::Int16: + case NYql::NProto::Uint16: case NYql::NProto::Int32: case NYql::NProto::Uint32: case NYql::NProto::Int64: @@ -153,19 +180,9 @@ namespace NKikimr::NSchemeShard { case NYql::NProto::Timestamp: case NYql::NProto::Decimal: return true; - case NYql::NProto::Interval: - case NYql::NProto::DyNumber: - case NYql::NProto::Yson: - case NYql::NProto::Json: - case NYql::NProto::JsonDocument: - case NYql::NProto::Float: - case NYql::NProto::Double: - case NYql::NProto::Bool: - return false; default: - break; + return false; } - return false; } bool TOlapColumnsUpdate::Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors) { @@ -232,11 +249,11 @@ namespace NKikimr::NSchemeShard { if (!column.ParseFromRequest(columnSchema, errors)) { return false; } - if (column.GetKeyOrder() && *column.GetKeyOrder() == 0) { - if (!TOlapColumnAdd::IsAllowedFirstPkType(column.GetType().GetTypeId())) { + if (column.IsKeyColumn()) { + if (!TOlapColumnAdd::IsAllowedPkType(column.GetType().GetTypeId())) { errors.AddError(NKikimrScheme::StatusSchemeError, TStringBuilder() - << "Type '" << column.GetType().GetTypeId() << "' specified for column '" << column.GetName() - << "' is not supported in first PK position"); + << "Type '" << column.GetTypeName() << "' specified for column '" << column.GetName() + << "' is not supported as primary key"); return false; } } diff --git a/ydb/core/tx/schemeshard/olap/columns/update.h b/ydb/core/tx/schemeshard/olap/columns/update.h index 26eb18a971af..1a29d648788e 100644 --- a/ydb/core/tx/schemeshard/olap/columns/update.h +++ b/ydb/core/tx/schemeshard/olap/columns/update.h @@ -14,9 +14,13 @@ class TOlapColumnDiff { YDB_READONLY_DEF(TString, Name); YDB_READONLY_DEF(NArrow::NSerialization::TSerializerContainer, Serializer); YDB_READONLY_DEF(NArrow::NDictionary::TEncodingDiff, DictionaryEncoding); + YDB_READONLY_DEF(std::optional, StorageId); public: bool ParseFromRequest(const NKikimrSchemeOp::TOlapColumnDiff& columnSchema, IErrorCollector& errors) { Name = columnSchema.GetName(); + if (!!columnSchema.GetStorageId()) { + StorageId = columnSchema.GetStorageId(); + } if (!Name) { errors.AddError("empty field name"); return false; @@ -41,6 +45,7 @@ class TOlapColumnAdd { YDB_READONLY_DEF(TString, Name); YDB_READONLY_DEF(TString, TypeName); YDB_READONLY_DEF(NScheme::TTypeInfo, Type); + YDB_READONLY_DEF(TString, StorageId); YDB_FLAG_ACCESSOR(NotNull, false); YDB_READONLY_DEF(std::optional, Serializer); YDB_READONLY_DEF(std::optional, DictionaryEncoding); @@ -57,7 +62,8 @@ class TOlapColumnAdd { return !!KeyOrder; } static bool IsAllowedType(ui32 typeId); - static bool IsAllowedFirstPkType(ui32 typeId); + static bool IsAllowedPkType(ui32 typeId); + static bool IsAllowedPgType(ui32 pgTypeId); }; class TOlapColumnsUpdate { diff --git a/ydb/core/tx/schemeshard/olap/indexes/schema.cpp b/ydb/core/tx/schemeshard/olap/indexes/schema.cpp index 4275e883711d..0f31bf0e2ede 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/schema.cpp +++ b/ydb/core/tx/schemeshard/olap/indexes/schema.cpp @@ -6,12 +6,14 @@ namespace NKikimr::NSchemeShard { void TOlapIndexSchema::SerializeToProto(NKikimrSchemeOp::TOlapIndexDescription& indexSchema) const { indexSchema.SetId(Id); indexSchema.SetName(Name); + indexSchema.SetStorageId(StorageId); IndexMeta.SerializeToProto(indexSchema); } void TOlapIndexSchema::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexDescription& indexSchema) { Id = indexSchema.GetId(); Name = indexSchema.GetName(); + StorageId = indexSchema.GetStorageId(); AFL_VERIFY(IndexMeta.DeserializeFromProto(indexSchema))("incorrect_proto", indexSchema.DebugString()); } @@ -22,6 +24,9 @@ bool TOlapIndexSchema::ApplyUpdate(const TOlapSchema& currentSchema, const TOlap errors.AddError("different index classes: " + upsert.GetIndexConstructor().GetClassName() + " vs " + IndexMeta.GetClassName()); return false; } + if (upsert.GetStorageId()) { + StorageId = *upsert.GetStorageId(); + } auto object = upsert.GetIndexConstructor()->CreateIndexMeta(GetId(), GetName(), currentSchema, errors); if (!object) { return false; diff --git a/ydb/core/tx/schemeshard/olap/indexes/schema.h b/ydb/core/tx/schemeshard/olap/indexes/schema.h index 630016fe96a5..1aa302ecb826 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/schema.h +++ b/ydb/core/tx/schemeshard/olap/indexes/schema.h @@ -10,6 +10,7 @@ class TOlapIndexSchema { using TBase = TOlapIndexUpsert; YDB_READONLY(ui32, Id, Max()); YDB_READONLY_DEF(TString, Name); + YDB_READONLY_DEF(TString, StorageId); YDB_READONLY_DEF(NBackgroundTasks::TInterfaceProtoContainer, IndexMeta); public: TOlapIndexSchema() = default; diff --git a/ydb/core/tx/schemeshard/olap/indexes/update.cpp b/ydb/core/tx/schemeshard/olap/indexes/update.cpp index 596cba4b835c..727a21e7fae8 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/update.cpp +++ b/ydb/core/tx/schemeshard/olap/indexes/update.cpp @@ -4,12 +4,19 @@ namespace NKikimr::NSchemeShard { void TOlapIndexUpsert::SerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& requestedProto) const { requestedProto.SetName(Name); + if (StorageId && !!*StorageId) { + requestedProto.SetStorageId(*StorageId); + } IndexConstructor.SerializeToProto(requestedProto); } -void TOlapIndexUpsert::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& indexSchema) { +bool TOlapIndexUpsert::DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& indexSchema) { Name = indexSchema.GetName(); + if (!!indexSchema.GetStorageId()) { + StorageId = indexSchema.GetStorageId(); + } AFL_VERIFY(IndexConstructor.DeserializeFromProto(indexSchema))("incorrect_proto", indexSchema.DebugString()); + return true; } bool TOlapIndexesUpdate::Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors) { @@ -22,7 +29,7 @@ bool TOlapIndexesUpdate::Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& a TSet upsertIndexNames; for (auto& indexSchema : alterRequest.GetUpsertIndexes()) { TOlapIndexUpsert index; - index.DeserializeFromProto(indexSchema); + AFL_VERIFY(index.DeserializeFromProto(indexSchema)); if (!upsertIndexNames.emplace(index.GetName()).second) { errors.AddError(NKikimrScheme::StatusAlreadyExists, TStringBuilder() << "index '" << index.GetName() << "' duplication for add"); return false; diff --git a/ydb/core/tx/schemeshard/olap/indexes/update.h b/ydb/core/tx/schemeshard/olap/indexes/update.h index f6d0f88fa312..4350c175b306 100644 --- a/ydb/core/tx/schemeshard/olap/indexes/update.h +++ b/ydb/core/tx/schemeshard/olap/indexes/update.h @@ -11,6 +11,7 @@ namespace NKikimr::NSchemeShard { private: YDB_READONLY_DEF(TString, Name); YDB_READONLY_DEF(TString, TypeName); + YDB_READONLY_DEF(std::optional, StorageId); protected: NBackgroundTasks::TInterfaceProtoContainer IndexConstructor; public: @@ -20,7 +21,7 @@ namespace NKikimr::NSchemeShard { return IndexConstructor; } - void DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& requestedProto); + bool DeserializeFromProto(const NKikimrSchemeOp::TOlapIndexRequested& requestedProto); void SerializeToProto(NKikimrSchemeOp::TOlapIndexRequested& requestedProto) const; }; diff --git a/ydb/core/tx/schemeshard/olap/options/schema.cpp b/ydb/core/tx/schemeshard/olap/options/schema.cpp new file mode 100644 index 000000000000..f9bb47cdce0f --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/options/schema.cpp @@ -0,0 +1,22 @@ +#include "schema.h" + +namespace NKikimr::NSchemeShard { + +bool TOlapOptionsDescription::ApplyUpdate(const TOlapOptionsUpdate& schemaUpdate, IErrorCollector& /*errors*/) { + SchemeNeedActualization = schemaUpdate.GetSchemeNeedActualization(); + return true; +} + +void TOlapOptionsDescription::Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema) { + SchemeNeedActualization = tableSchema.GetOptions().GetSchemeNeedActualization(); +} + +void TOlapOptionsDescription::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const { + tableSchema.MutableOptions()->SetSchemeNeedActualization(SchemeNeedActualization); +} + +bool TOlapOptionsDescription::Validate(const NKikimrSchemeOp::TColumnTableSchema& /*opSchema*/, IErrorCollector& /*errors*/) const { + return true; +} + +} diff --git a/ydb/core/tx/schemeshard/olap/options/schema.h b/ydb/core/tx/schemeshard/olap/options/schema.h new file mode 100644 index 000000000000..ecaa79f69577 --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/options/schema.h @@ -0,0 +1,18 @@ +#pragma once +#include "update.h" + +namespace NKikimr::NSchemeShard { + +class TOlapSchema; + +class TOlapOptionsDescription { +private: + YDB_READONLY(bool, SchemeNeedActualization, false); +public: + bool ApplyUpdate(const TOlapOptionsUpdate& schemaUpdate, IErrorCollector& errors); + + void Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); + void Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const; + bool Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const; +}; +} diff --git a/ydb/core/tx/schemeshard/olap/options/update.cpp b/ydb/core/tx/schemeshard/olap/options/update.cpp new file mode 100644 index 000000000000..1080546ffcc0 --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/options/update.cpp @@ -0,0 +1,5 @@ +#include "update.h" + +namespace NKikimr::NSchemeShard { + +} diff --git a/ydb/core/tx/schemeshard/olap/options/update.h b/ydb/core/tx/schemeshard/olap/options/update.h new file mode 100644 index 000000000000..8e8afcf27359 --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/options/update.h @@ -0,0 +1,22 @@ +#pragma once +#include + +#include + +#include + +namespace NKikimr::NSchemeShard { + + class TOlapOptionsUpdate { + private: + YDB_ACCESSOR(bool, SchemeNeedActualization, false); + public: + bool Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& /*errors*/) { + SchemeNeedActualization = alterRequest.GetOptions().GetSchemeNeedActualization(); + return true; + } + void SerializeToProto(NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest) const { + alterRequest.MutableOptions()->SetSchemeNeedActualization(SchemeNeedActualization); + } + }; +} diff --git a/ydb/core/tx/schemeshard/olap/options/ya.make b/ydb/core/tx/schemeshard/olap/options/ya.make new file mode 100644 index 000000000000..0303a9692f52 --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/options/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + schema.cpp + update.cpp +) + +PEERDIR( + ydb/services/bg_tasks/abstract +) + +END() diff --git a/ydb/core/tx/schemeshard/olap/schema/schema.cpp b/ydb/core/tx/schemeshard/olap/schema/schema.cpp index ff7ba1bbe4ca..48a2f4ed6d29 100644 --- a/ydb/core/tx/schemeshard/olap/schema/schema.cpp +++ b/ydb/core/tx/schemeshard/olap/schema/schema.cpp @@ -1,93 +1,212 @@ #include "schema.h" +#include +#include namespace NKikimr::NSchemeShard { - bool TOlapSchema::Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& errors) { - if (!Columns.ApplyUpdate(schemaUpdate.GetColumns(), errors, NextColumnId)) { - return false; - } +namespace { +static inline bool IsDropped(const TOlapColumnsDescription::TColumn& col) { + Y_UNUSED(col); + return false; +} + +static inline ui32 GetType(const TOlapColumnsDescription::TColumn& col) { + Y_ABORT_UNLESS(col.GetType().GetTypeId() != NScheme::NTypeIds::Pg, "pg types are not supported"); + return col.GetType().GetTypeId(); +} + +} + +static bool ValidateColumnTableTtl(const NKikimrSchemeOp::TColumnDataLifeCycle::TTtl& ttl, + const THashMap& sourceColumns, + const THashMap& alterColumns, + const THashMap& colName2Id, + IErrorCollector& errors) { + const TString colName = ttl.GetColumnName(); + + auto it = colName2Id.find(colName); + if (it == colName2Id.end()) { + errors.AddError(Sprintf("Cannot enable TTL on unknown column: '%s'", colName.data())); + return false; + } + + const TOlapColumnsDescription::TColumn* column = nullptr; + const ui32 colId = it->second; + if (alterColumns.contains(colId)) { + column = &alterColumns.at(colId); + } else if (sourceColumns.contains(colId)) { + column = &sourceColumns.at(colId); + } else { + Y_ABORT_UNLESS("Unknown column"); + } + + if (IsDropped(*column)) { + errors.AddError(Sprintf("Cannot enable TTL on dropped column: '%s'", colName.data())); + return false; + } + + if (ttl.HasExpireAfterBytes()) { + errors.AddError("TTL with eviction by size is not supported yet"); + return false; + } + + if (!ttl.HasExpireAfterSeconds()) { + errors.AddError("TTL without eviction time"); + return false; + } + + auto unit = ttl.GetColumnUnit(); - if (!Indexes.ApplyUpdate(*this, schemaUpdate.GetIndexes(), errors, NextColumnId)) { + switch (GetType(*column)) { + case NScheme::NTypeIds::DyNumber: + errors.AddError("Unsupported column type for TTL in column tables"); return false; - } + default: + break; + } + + TString errStr; + if (!NValidation::TTTLValidator::ValidateUnit(GetType(*column), unit, errStr)) { + errors.AddError(errStr); + return false; + } + return true; +} - if (!HasEngine()) { - Engine = schemaUpdate.GetEngineDef(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); - } else { - if (schemaUpdate.HasEngine()) { - errors.AddError(NKikimrScheme::StatusSchemeError, "No engine updates supported"); +bool TOlapSchema::ValidateTtlSettings(const NKikimrSchemeOp::TColumnDataLifeCycle& ttl, IErrorCollector& errors) const { + using TTtlProto = NKikimrSchemeOp::TColumnDataLifeCycle; + switch (ttl.GetStatusCase()) { + case TTtlProto::kEnabled: + { + const auto* column = Columns.GetByName(ttl.GetEnabled().GetColumnName()); + if (!column) { + errors.AddError("Incorrect ttl column - not found in scheme"); return false; } + if (!Statistics.GetByIdOptional(NOlap::NStatistics::EType::Max, {column->GetId()})) { + TOlapStatisticsModification modification; + NOlap::NStatistics::TConstructorContainer container(std::make_shared(column->GetName())); + modification.AddUpsert("__TTL_PROVIDER::" + TGUID::CreateTimebased().AsUuidString(), container); + if (!Statistics.ApplyUpdate(*this, modification, errors)) { + return false; + } + } + return ValidateColumnTableTtl(ttl.GetEnabled(), {}, Columns.GetColumns(), Columns.GetColumnsByName(), errors); } - - ++Version; - return true; + case TTtlProto::kDisabled: + default: + break; } - void TOlapSchema::ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& tableSchema) { - NextColumnId = tableSchema.GetNextColumnId(); - Version = tableSchema.GetVersion(); - Y_ABORT_UNLESS(tableSchema.HasEngine()); - Engine = tableSchema.GetEngine(); - CompositeMarksFlag = tableSchema.GetCompositeMarks(); + return true; +} - Columns.Parse(tableSchema); - Indexes.Parse(tableSchema); +bool TOlapSchema::Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& errors) { + if (!Columns.ApplyUpdate(schemaUpdate.GetColumns(), errors, NextColumnId)) { + return false; } - void TOlapSchema::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const { - tableSchema.SetNextColumnId(NextColumnId); - tableSchema.SetVersion(Version); - tableSchema.SetCompositeMarks(CompositeMarksFlag); + if (!Indexes.ApplyUpdate(*this, schemaUpdate.GetIndexes(), errors, NextColumnId)) { + return false; + } - Y_ABORT_UNLESS(HasEngine()); - tableSchema.SetEngine(GetEngineUnsafe()); + if (!Statistics.ApplyUpdate(*this, schemaUpdate.GetStatistics(), errors)) { + return false; + } - Columns.Serialize(tableSchema); - Indexes.Serialize(tableSchema); + if (!Options.ApplyUpdate(schemaUpdate.GetOptions(), errors)) { + return false; } - bool TOlapSchema::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const { - if (!Columns.Validate(opSchema, errors)) { + if (!HasEngine()) { + Engine = schemaUpdate.GetEngineDef(NKikimrSchemeOp::COLUMN_ENGINE_REPLACING_TIMESERIES); + } else { + if (schemaUpdate.HasEngine()) { + errors.AddError(NKikimrScheme::StatusSchemeError, "No engine updates supported"); return false; } + } - if (!Indexes.Validate(opSchema, errors)) { - return false; - } + ++Version; + return true; +} - if (opSchema.GetEngine() != Engine) { - errors.AddError("Specified schema engine does not match schema preset"); - return false; - } - return true; +void TOlapSchema::ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& tableSchema) { + NextColumnId = tableSchema.GetNextColumnId(); + Version = tableSchema.GetVersion(); + Y_ABORT_UNLESS(tableSchema.HasEngine()); + Engine = tableSchema.GetEngine(); + CompositeMarksFlag = tableSchema.GetCompositeMarks(); + + Columns.Parse(tableSchema); + Indexes.Parse(tableSchema); + Options.Parse(tableSchema); + Statistics.Parse(tableSchema); +} + +void TOlapSchema::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const { + tableSchema.SetNextColumnId(NextColumnId); + tableSchema.SetVersion(Version); + tableSchema.SetCompositeMarks(CompositeMarksFlag); + + Y_ABORT_UNLESS(HasEngine()); + tableSchema.SetEngine(GetEngineUnsafe()); + + Columns.Serialize(tableSchema); + Indexes.Serialize(tableSchema); + Options.Serialize(tableSchema); + Statistics.Serialize(tableSchema); +} + +bool TOlapSchema::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const { + if (!Columns.Validate(opSchema, errors)) { + return false; } - void TOlapStoreSchemaPreset::ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchemaPreset& presetProto) { - Y_ABORT_UNLESS(presetProto.HasId()); - Y_ABORT_UNLESS(presetProto.HasName()); - Y_ABORT_UNLESS(presetProto.HasSchema()); - Id = presetProto.GetId(); - Name = presetProto.GetName(); - TOlapSchema::ParseFromLocalDB(presetProto.GetSchema()); + if (!Indexes.Validate(opSchema, errors)) { + return false; } - void TOlapStoreSchemaPreset::Serialize(NKikimrSchemeOp::TColumnTableSchemaPreset& presetProto) const { - presetProto.SetId(Id); - presetProto.SetName(Name); - TOlapSchema::Serialize(*presetProto.MutableSchema()); + if (!Options.Validate(opSchema, errors)) { + return false; } - bool TOlapStoreSchemaPreset::ParseFromRequest(const NKikimrSchemeOp::TColumnTableSchemaPreset& presetProto, IErrorCollector& errors) { - if (presetProto.HasId()) { - errors.AddError("Schema preset id cannot be specified explicitly"); - return false; - } - if (!presetProto.GetName()) { - errors.AddError("Schema preset name cannot be empty"); - return false; - } - Name = presetProto.GetName(); - return true; + if (!Statistics.Validate(opSchema, errors)) { + return false; + } + + if (opSchema.GetEngine() != Engine) { + errors.AddError("Specified schema engine does not match schema preset"); + return false; } + return true; +} + +void TOlapStoreSchemaPreset::ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchemaPreset& presetProto) { + Y_ABORT_UNLESS(presetProto.HasId()); + Y_ABORT_UNLESS(presetProto.HasName()); + Y_ABORT_UNLESS(presetProto.HasSchema()); + Id = presetProto.GetId(); + Name = presetProto.GetName(); + TOlapSchema::ParseFromLocalDB(presetProto.GetSchema()); +} + +void TOlapStoreSchemaPreset::Serialize(NKikimrSchemeOp::TColumnTableSchemaPreset& presetProto) const { + presetProto.SetId(Id); + presetProto.SetName(Name); + TOlapSchema::Serialize(*presetProto.MutableSchema()); +} + +bool TOlapStoreSchemaPreset::ParseFromRequest(const NKikimrSchemeOp::TColumnTableSchemaPreset& presetProto, IErrorCollector& errors) { + if (presetProto.HasId()) { + errors.AddError("Schema preset id cannot be specified explicitly"); + return false; + } + if (!presetProto.GetName()) { + errors.AddError("Schema preset name cannot be empty"); + return false; + } + Name = presetProto.GetName(); + return true; +} } diff --git a/ydb/core/tx/schemeshard/olap/schema/schema.h b/ydb/core/tx/schemeshard/olap/schema/schema.h index ca9e8b14c64b..5cedea25de31 100644 --- a/ydb/core/tx/schemeshard/olap/schema/schema.h +++ b/ydb/core/tx/schemeshard/olap/schema/schema.h @@ -1,8 +1,11 @@ #pragma once #include #include +#include #include #include +#include +#include #include "update.h" namespace NKikimr::NSchemeShard { @@ -12,12 +15,18 @@ namespace NKikimr::NSchemeShard { YDB_READONLY_OPT(NKikimrSchemeOp::EColumnTableEngine, Engine); YDB_READONLY_DEF(TOlapColumnsDescription, Columns); YDB_READONLY_DEF(TOlapIndexesDescription, Indexes); + YDB_READONLY_DEF(TOlapOptionsDescription, Options); + mutable TOlapStatisticsDescription Statistics; YDB_READONLY(ui32, NextColumnId, 1); YDB_READONLY(ui32, Version, 0); YDB_READONLY_FLAG(CompositeMarks, true); public: + const TOlapStatisticsDescription& GetStatistics() const { + return Statistics; + } + bool Update(const TOlapSchemaUpdate& schemaUpdate, IErrorCollector& errors); void ParseFromLocalDB(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); @@ -26,7 +35,7 @@ namespace NKikimr::NSchemeShard { bool ValidateTtlSettings(const NKikimrSchemeOp::TColumnDataLifeCycle& ttlSettings, IErrorCollector& errors) const; }; - class TOlapStoreSchemaPreset : public TOlapSchema { + class TOlapStoreSchemaPreset: public TOlapSchema { private: using TBase = TOlapSchema; YDB_ACCESSOR_DEF(TString, Name); diff --git a/ydb/core/tx/schemeshard/olap/schema/update.cpp b/ydb/core/tx/schemeshard/olap/schema/update.cpp index 92d7d9038fb8..b78161394b78 100644 --- a/ydb/core/tx/schemeshard/olap/schema/update.cpp +++ b/ydb/core/tx/schemeshard/olap/schema/update.cpp @@ -23,6 +23,14 @@ namespace NKikimr::NSchemeShard { return false; } + if (!Statistics.Parse(alterRequest, errors)) { + return false; + } + + if (!Options.Parse(alterRequest, errors)) { + return false; + } + return true; } } diff --git a/ydb/core/tx/schemeshard/olap/schema/update.h b/ydb/core/tx/schemeshard/olap/schema/update.h index bb4173433f30..d61b97749a5d 100644 --- a/ydb/core/tx/schemeshard/olap/schema/update.h +++ b/ydb/core/tx/schemeshard/olap/schema/update.h @@ -1,10 +1,17 @@ #pragma once +#include +#include +#include +#include +#include namespace NKikimr::NSchemeShard { class TOlapSchemaUpdate { YDB_READONLY_DEF(TOlapColumnsUpdate, Columns); YDB_READONLY_DEF(TOlapIndexesUpdate, Indexes); + YDB_READONLY_DEF(TOlapOptionsUpdate, Options); + YDB_READONLY_DEF(TOlapStatisticsModification, Statistics); YDB_READONLY_OPT(NKikimrSchemeOp::EColumnTableEngine, Engine); public: bool Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema, IErrorCollector& errors, bool allowNullKeys = false); diff --git a/ydb/core/tx/schemeshard/olap/schema/ya.make b/ydb/core/tx/schemeshard/olap/schema/ya.make index 9c21a43bbbb7..76b2d2d1c801 100644 --- a/ydb/core/tx/schemeshard/olap/schema/ya.make +++ b/ydb/core/tx/schemeshard/olap/schema/ya.make @@ -8,6 +8,8 @@ SRCS( PEERDIR( ydb/core/tx/schemeshard/olap/columns ydb/core/tx/schemeshard/olap/indexes + ydb/core/tx/schemeshard/olap/options + ydb/core/tx/schemeshard/common ) END() diff --git a/ydb/core/tx/schemeshard/olap/statistics/schema.cpp b/ydb/core/tx/schemeshard/olap/statistics/schema.cpp new file mode 100644 index 000000000000..af6f9e711d05 --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/statistics/schema.cpp @@ -0,0 +1,92 @@ +#include "schema.h" +#include + +namespace NKikimr::NSchemeShard { + +void TOlapStatisticsSchema::SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const { + Operator.SerializeToProto(proto); +} + +bool TOlapStatisticsSchema::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) { + AFL_VERIFY(Operator.DeserializeFromProto(proto))("incorrect_proto", proto.DebugString()); + return true; +} + +bool TOlapStatisticsSchema::ApplyUpdate(const TOlapSchema& /*currentSchema*/, const TOlapStatisticsUpsert& upsert, IErrorCollector& errors) { + AFL_VERIFY(upsert.GetName() == Operator.GetName()); + AFL_VERIFY(!!upsert.GetConstructor()); + if (upsert.GetConstructor().GetClassName() != Operator.GetClassName()) { + errors.AddError("different index classes: " + upsert.GetConstructor().GetClassName() + " vs " + Operator.GetClassName()); + return false; + } + errors.AddError("cannot modify statistics calculation for " + Operator.GetName() + ". not implemented currently."); + return false; +} + +bool TOlapStatisticsDescription::ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsModification& schemaUpdate, IErrorCollector& errors) { + for (auto&& stat : schemaUpdate.GetUpsert()) { + auto* current = MutableByNameOptional(stat.GetName()); + if (current) { + if (!current->ApplyUpdate(currentSchema, stat, errors)) { + return false; + } + } else { + auto meta = stat.GetConstructor()->CreateOperator(stat.GetName(), currentSchema); + if (!meta) { + errors.AddError(meta.GetErrorMessage()); + return false; + } + TOlapStatisticsSchema object(meta.DetachResult()); + Y_ABORT_UNLESS(ObjectsByName.emplace(stat.GetName(), std::move(object)).second); + } + } + + for (const auto& name : schemaUpdate.GetDrop()) { + auto info = GetByNameOptional(name); + if (!info) { + errors.AddError(NKikimrScheme::StatusSchemeError, TStringBuilder() << "Unknown stat for drop: " << name); + return false; + } + AFL_VERIFY(ObjectsByName.erase(name)); + } + + return true; +} + +void TOlapStatisticsDescription::Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema) { + for (const auto& proto : tableSchema.GetStatistics()) { + TOlapStatisticsSchema object; + AFL_VERIFY(object.DeserializeFromProto(proto)); + AFL_VERIFY(ObjectsByName.emplace(proto.GetName(), std::move(object)).second); + } +} + +void TOlapStatisticsDescription::Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const { + for (const auto& object : ObjectsByName) { + object.second.SerializeToProto(*tableSchema.AddStatistics()); + } +} + +bool TOlapStatisticsDescription::Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const { + THashSet usedObjects; + for (const auto& proto : opSchema.GetStatistics()) { + if (proto.GetName().empty()) { + errors.AddError("Statistic cannot have an empty name"); + return false; + } + + const TString& name = proto.GetName(); + if (!GetByNameOptional(name)) { + errors.AddError("Stat '" + name + "' does not match schema preset"); + return false; + } + + if (!usedObjects.emplace(proto.GetName()).second) { + errors.AddError("Column '" + name + "' is specified multiple times"); + return false; + } + } + return true; +} + +} diff --git a/ydb/core/tx/schemeshard/olap/statistics/schema.h b/ydb/core/tx/schemeshard/olap/statistics/schema.h new file mode 100644 index 000000000000..37a79fc17fdd --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/statistics/schema.h @@ -0,0 +1,80 @@ +#pragma once +#include "update.h" + +namespace NKikimr::NSchemeShard { + +class TOlapSchema; + +class TOlapStatisticsSchema { +private: + YDB_READONLY_DEF(NOlap::NStatistics::TOperatorContainer, Operator); +public: + TOlapStatisticsSchema() = default; + + TOlapStatisticsSchema(const NOlap::NStatistics::TOperatorContainer& container) + : Operator(container) + { + AFL_VERIFY(container.GetName()); + } + + bool ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsUpsert& upsert, IErrorCollector& errors); + + void SerializeToProto(NKikimrColumnShardStatisticsProto::TOperatorContainer& proto) const; + bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TOperatorContainer& proto); +}; + +class TOlapStatisticsDescription { +public: + using TObjectsByName = THashMap; + +private: + YDB_READONLY_DEF(TObjectsByName, ObjectsByName); +public: + const TOlapStatisticsSchema* GetByIdOptional(const NOlap::NStatistics::EType type, const std::vector& entityIds) const noexcept { + for (auto&& i : ObjectsByName) { + if (!i.second.GetOperator()) { + continue; + } + if (i.second.GetOperator()->GetIdentifier() != NOlap::NStatistics::TIdentifier(type, entityIds)) { + continue; + } + return &i.second; + } + return nullptr; + } + + const TOlapStatisticsSchema* GetByNameOptional(const TString& name) const noexcept { + auto it = ObjectsByName.find(name); + if (it != ObjectsByName.end()) { + return &it->second; + } + return nullptr; + } + + const TOlapStatisticsSchema& GetByNameVerified(const TString& name) const noexcept { + auto object = GetByNameOptional(name); + AFL_VERIFY(object); + return *object; + } + + TOlapStatisticsSchema* MutableByNameOptional(const TString& name) noexcept { + auto it = ObjectsByName.find(name); + if (it != ObjectsByName.end()) { + return &it->second; + } + return nullptr; + } + + TOlapStatisticsSchema& MutableByNameVerified(const TString& name) noexcept { + auto* object = MutableByNameOptional(name); + AFL_VERIFY(object); + return *object; + } + + bool ApplyUpdate(const TOlapSchema& currentSchema, const TOlapStatisticsModification& schemaUpdate, IErrorCollector& errors); + + void Parse(const NKikimrSchemeOp::TColumnTableSchema& tableSchema); + void Serialize(NKikimrSchemeOp::TColumnTableSchema& tableSchema) const; + bool Validate(const NKikimrSchemeOp::TColumnTableSchema& opSchema, IErrorCollector& errors) const; +}; +} diff --git a/ydb/core/tx/schemeshard/olap/statistics/update.cpp b/ydb/core/tx/schemeshard/olap/statistics/update.cpp new file mode 100644 index 000000000000..1c82c07c300c --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/statistics/update.cpp @@ -0,0 +1,35 @@ +#include "update.h" + +namespace NKikimr::NSchemeShard { + +void TOlapStatisticsUpsert::SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto) const { + requestedProto.SetName(Name); + Constructor.SerializeToProto(requestedProto); +} + +bool TOlapStatisticsUpsert::DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& proto) { + Name = proto.GetName(); + AFL_VERIFY(Constructor.DeserializeFromProto(proto))("incorrect_proto", proto.DebugString()); + return true; +} + +bool TOlapStatisticsModification::Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors) { + for (const auto& name : alterRequest.GetDropStatistics()) { + if (!Drop.emplace(name).second) { + errors.AddError(NKikimrScheme::StatusInvalidParameter, "Duplicated statistics for drop"); + return false; + } + } + TSet upsertNames; + for (auto& schema : alterRequest.GetUpsertStatistics()) { + TOlapStatisticsUpsert stat; + AFL_VERIFY(stat.DeserializeFromProto(schema)); + if (!upsertNames.emplace(stat.GetName()).second) { + errors.AddError(NKikimrScheme::StatusAlreadyExists, TStringBuilder() << "stat '" << stat.GetName() << "' duplication for add"); + return false; + } + Upsert.emplace_back(std::move(stat)); + } + return true; +} +} diff --git a/ydb/core/tx/schemeshard/olap/statistics/update.h b/ydb/core/tx/schemeshard/olap/statistics/update.h new file mode 100644 index 000000000000..96558928acf3 --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/statistics/update.h @@ -0,0 +1,43 @@ +#pragma once +#include +#include +#include +#include +#include + +namespace NKikimr::NSchemeShard { + + class TOlapStatisticsUpsert { + private: + YDB_READONLY_DEF(TString, Name); + protected: + NOlap::NStatistics::TConstructorContainer Constructor; + public: + TOlapStatisticsUpsert() = default; + TOlapStatisticsUpsert(const TString& name, const NOlap::NStatistics::TConstructorContainer& constructor) + : Name(name) + , Constructor(constructor) + { + + } + + const NOlap::NStatistics::TConstructorContainer& GetConstructor() const { + return Constructor; + } + + bool DeserializeFromProto(const NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto); + void SerializeToProto(NKikimrColumnShardStatisticsProto::TConstructorContainer& requestedProto) const; + }; + + class TOlapStatisticsModification { + private: + YDB_READONLY_DEF(TVector, Upsert); + YDB_READONLY_DEF(TSet, Drop); + public: + void AddUpsert(const TString& name, const NOlap::NStatistics::TConstructorContainer container) { + Upsert.emplace_back(TOlapStatisticsUpsert(name, container)); + } + + bool Parse(const NKikimrSchemeOp::TAlterColumnTableSchema& alterRequest, IErrorCollector& errors); + }; +} diff --git a/ydb/core/tx/schemeshard/olap/statistics/ya.make b/ydb/core/tx/schemeshard/olap/statistics/ya.make new file mode 100644 index 000000000000..0303a9692f52 --- /dev/null +++ b/ydb/core/tx/schemeshard/olap/statistics/ya.make @@ -0,0 +1,12 @@ +LIBRARY() + +SRCS( + schema.cpp + update.cpp +) + +PEERDIR( + ydb/services/bg_tasks/abstract +) + +END() diff --git a/ydb/core/tx/schemeshard/olap/ya.make b/ydb/core/tx/schemeshard/olap/ya.make index 63e509c2630f..224d54e7e4cd 100644 --- a/ydb/core/tx/schemeshard/olap/ya.make +++ b/ydb/core/tx/schemeshard/olap/ya.make @@ -6,6 +6,8 @@ PEERDIR( ydb/core/tx/schemeshard/olap/schema ydb/core/tx/schemeshard/olap/common ydb/core/tx/schemeshard/olap/operations + ydb/core/tx/schemeshard/olap/statistics + ydb/core/tx/schemeshard/olap/options ) END() diff --git a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp index fb0fdc3c3ba7..c138d4c781ed 100644 --- a/ydb/core/tx/schemeshard/schemeshard_info_types.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_info_types.cpp @@ -2252,7 +2252,7 @@ bool TOlapStoreInfo::ParseFromRequest(const NKikimrSchemeOp::TColumnStoreDescrip preset.SetProtoIndex(protoIndex++); TOlapSchemaUpdate schemaDiff; - if (!schemaDiff.Parse(presetProto.GetSchema(), errors, true)) { + if (!schemaDiff.Parse(presetProto.GetSchema(), errors)) { return false; } diff --git a/ydb/core/tx/schemeshard/schemeshard_validate_ttl.cpp b/ydb/core/tx/schemeshard/schemeshard_validate_ttl.cpp index 40a792d0f484..cc447f07015b 100644 --- a/ydb/core/tx/schemeshard/schemeshard_validate_ttl.cpp +++ b/ydb/core/tx/schemeshard/schemeshard_validate_ttl.cpp @@ -1,60 +1,23 @@ #include "schemeshard_info_types.h" + +#include "common/validation.h" #include "olap/columns/schema.h" + #include namespace NKikimr { namespace NSchemeShard { -// Helper accessors for OLTP and OLAP tables that use different TColumn's namespace { - inline - bool IsDropped(const TOlapColumnsDescription::TColumn& col) { - Y_UNUSED(col); - return false; - } - - inline - ui32 GetType(const TOlapColumnsDescription::TColumn& col) { - Y_ABORT_UNLESS(col.GetType().GetTypeId() != NScheme::NTypeIds::Pg, "pg types are not supported"); - return col.GetType().GetTypeId(); - } - - inline - bool IsDropped(const TTableInfo::TColumn& col) { - return col.IsDropped(); - } +static inline bool IsDropped(const TTableInfo::TColumn& col) { + return col.IsDropped(); +} - inline - ui32 GetType(const TTableInfo::TColumn& col) { - Y_ABORT_UNLESS(col.PType.GetTypeId() != NScheme::NTypeIds::Pg, "pg types are not supported"); - return col.PType.GetTypeId(); - } +static inline ui32 GetType(const TTableInfo::TColumn& col) { + Y_ABORT_UNLESS(col.PType.GetTypeId() != NScheme::NTypeIds::Pg, "pg types are not supported"); + return col.PType.GetTypeId(); } -template -bool ValidateUnit(const TColumn& column, NKikimrSchemeOp::TTTLSettings::EUnit unit, TString& errStr) { - switch (GetType(column)) { - case NScheme::NTypeIds::Date: - case NScheme::NTypeIds::Datetime: - case NScheme::NTypeIds::Timestamp: - if (unit != NKikimrSchemeOp::TTTLSettings::UNIT_AUTO) { - errStr = "To enable TTL on date type column 'DateTypeColumnModeSettings' should be specified"; - return false; - } - break; - case NScheme::NTypeIds::Uint32: - case NScheme::NTypeIds::Uint64: - case NScheme::NTypeIds::DyNumber: - if (unit == NKikimrSchemeOp::TTTLSettings::UNIT_AUTO) { - errStr = "To enable TTL on integral type column 'ValueSinceUnixEpochModeSettings' should be specified"; - return false; - } - break; - default: - errStr = "Unsupported column type"; - return false; - } - return true; } bool ValidateTtlSettings(const NKikimrSchemeOp::TTTLSettings& ttl, @@ -92,7 +55,7 @@ bool ValidateTtlSettings(const NKikimrSchemeOp::TTTLSettings& ttl, } const auto unit = enabled.GetColumnUnit(); - if (!ValidateUnit(*column, unit, errStr)) { + if (!NValidation::TTTLValidator::ValidateUnit(GetType(*column), unit, errStr)) { return false; } @@ -117,75 +80,4 @@ bool ValidateTtlSettings(const NKikimrSchemeOp::TTTLSettings& ttl, return true; } -static bool ValidateColumnTableTtl(const NKikimrSchemeOp::TColumnDataLifeCycle::TTtl& ttl, - const THashMap& sourceColumns, - const THashMap& alterColumns, - const THashMap& colName2Id, - IErrorCollector& errors) -{ - const TString colName = ttl.GetColumnName(); - - auto it = colName2Id.find(colName); - if (it == colName2Id.end()) { - errors.AddError(Sprintf("Cannot enable TTL on unknown column: '%s'", colName.data())); - return false; - } - - const TOlapColumnsDescription::TColumn* column = nullptr; - const ui32 colId = it->second; - if (alterColumns.contains(colId)) { - column = &alterColumns.at(colId); - } else if (sourceColumns.contains(colId)) { - column = &sourceColumns.at(colId); - } else { - Y_ABORT_UNLESS("Unknown column"); - } - - if (IsDropped(*column)) { - errors.AddError(Sprintf("Cannot enable TTL on dropped column: '%s'", colName.data())); - return false; - } - - if (ttl.HasExpireAfterBytes()) { - errors.AddError("TTL with eviction by size is not supported yet"); - return false; - } - - if (!ttl.HasExpireAfterSeconds()) { - errors.AddError("TTL without eviction time"); - return false; - } - - auto unit = ttl.GetColumnUnit(); - - switch (GetType(*column)) { - case NScheme::NTypeIds::DyNumber: - errors.AddError("Unsupported column type for TTL in column tables"); - return false; - default: - break; - } - - TString errStr; - if (!ValidateUnit(*column, unit, errStr)) { - errors.AddError(errStr); - return false; - } - return true; -} - -bool TOlapSchema::ValidateTtlSettings(const NKikimrSchemeOp::TColumnDataLifeCycle& ttl, IErrorCollector& errors) const { - using TTtlProto = NKikimrSchemeOp::TColumnDataLifeCycle; - - switch (ttl.GetStatusCase()) { - case TTtlProto::kEnabled: - return ValidateColumnTableTtl(ttl.GetEnabled(), {}, Columns.GetColumns(), Columns.GetColumnsByName(), errors); - case TTtlProto::kDisabled: - default: - break; - } - - return true; -} - }} diff --git a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp index 16a2468863be..870c43429785 100644 --- a/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp +++ b/ydb/core/tx/schemeshard/ut_helpers/helpers.cpp @@ -2345,7 +2345,7 @@ namespace NSchemeShardUT_Private { TSerializedCellMatrix matrix(cells, 1, 2); auto evWrite = std::make_unique(txId, NKikimrDataEvents::TEvWrite::MODE_IMMEDIATE); - ui64 payloadIndex = NKikimr::NEvWrite::TPayloadHelper(*evWrite).AddDataToPayload(std::move(matrix.ReleaseBuffer())); + ui64 payloadIndex = NKikimr::NEvWrite::TPayloadWriter(*evWrite).AddDataToPayload(std::move(matrix.ReleaseBuffer())); evWrite->AddOperation(NKikimrDataEvents::TEvWrite::TOperation::OPERATION_UPSERT, tableId, columnIds, payloadIndex, NKikimrDataEvents::FORMAT_CELLVEC); ForwardToTablet(runtime, datashardTabletId, sender, evWrite.release()); diff --git a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp index 06c25b6bdd61..31022e755581 100644 --- a/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp +++ b/ydb/core/tx/schemeshard/ut_olap/ut_olap.cpp @@ -38,9 +38,9 @@ static const TString defaultTableSchema = R"( } )"; -static const TVector> defaultYdbSchema = { - {"timestamp", TTypeInfo(NTypeIds::Timestamp) }, - {"data", TTypeInfo(NTypeIds::Utf8) } +static const TVector defaultYdbSchema = { + NArrow::NTest::TTestColumn("timestamp", TTypeInfo(NTypeIds::Timestamp) ), + NArrow::NTest::TTestColumn("data", TTypeInfo(NTypeIds::Utf8) ) }; }} @@ -65,7 +65,7 @@ Y_UNIT_TEST_SUITE(TOlap) { SchemaPresets { Name: "default" Schema { - Columns { Name: "timestamp" Type: "Timestamp" } + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } Columns { Name: "data" Type: "Utf8" } KeyColumnNames: "timestamp" Engine: COLUMN_ENGINE_REPLACING_TIMESERIES @@ -87,7 +87,7 @@ Y_UNIT_TEST_SUITE(TOlap) { SchemaPresets { Name: "default" Schema { - Columns { Name: "timestamp" Type: "Timestamp" } + Columns { Name: "timestamp" Type: "Timestamp" NotNull: true } Columns { Name: "data" Type: "Utf8" } KeyColumnNames: "timestamp" Engine: COLUMN_ENGINE_REPLACING_TIMESERIES diff --git a/ydb/core/tx/schemeshard/ut_olap/ya.make b/ydb/core/tx/schemeshard/ut_olap/ya.make index 8bf46e35dc57..231ca8a779b2 100644 --- a/ydb/core/tx/schemeshard/ut_olap/ya.make +++ b/ydb/core/tx/schemeshard/ut_olap/ya.make @@ -21,6 +21,8 @@ PEERDIR( ydb/core/formats ydb/core/tx ydb/core/tx/columnshard + ydb/core/tx/columnshard/test_helper + ydb/core/tx/columnshard/hooks/testing ydb/core/tx/schemeshard/ut_helpers ydb/library/yql/public/udf/service/exception_policy ) diff --git a/ydb/core/tx/schemeshard/ya.make b/ydb/core/tx/schemeshard/ya.make index 1c13a431f4a6..8658486694a8 100644 --- a/ydb/core/tx/schemeshard/ya.make +++ b/ydb/core/tx/schemeshard/ya.make @@ -259,6 +259,7 @@ PEERDIR( ydb/core/tablet_flat ydb/core/tx ydb/core/tx/datashard + ydb/core/tx/schemeshard/common ydb/core/tx/schemeshard/olap ydb/core/tx/scheme_board ydb/core/tx/tx_allocator_client diff --git a/ydb/core/tx/tiering/abstract/manager.cpp b/ydb/core/tx/tiering/abstract/manager.cpp new file mode 100644 index 000000000000..dc6f9c16cd1e --- /dev/null +++ b/ydb/core/tx/tiering/abstract/manager.cpp @@ -0,0 +1,12 @@ +#include "manager.h" +#include + +namespace NKikimr::NColumnShard { + +const NTiers::TManager& ITiersManager::GetManagerVerified(const TString& tierId) const { + auto* result = GetManagerOptional(tierId); + AFL_VERIFY(result)("tier_id", tierId); + return *result; +} + +} diff --git a/ydb/core/tx/tiering/abstract/manager.h b/ydb/core/tx/tiering/abstract/manager.h new file mode 100644 index 000000000000..994ecbfa6b4e --- /dev/null +++ b/ydb/core/tx/tiering/abstract/manager.h @@ -0,0 +1,18 @@ +#pragma once +#include +#include + +namespace NKikimr::NColumnShard { +namespace NTiers { +class TManager; +} + +class ITiersManager { +public: + const NTiers::TManager& GetManagerVerified(const TString& tierId) const; + virtual const NTiers::TManager* GetManagerOptional(const TString& tierId) const = 0; + virtual const std::map& GetManagers() const = 0; + virtual ~ITiersManager() = default; +}; + +} diff --git a/ydb/core/tx/tiering/abstract/ya.make b/ydb/core/tx/tiering/abstract/ya.make new file mode 100644 index 000000000000..cd240f0f8cd6 --- /dev/null +++ b/ydb/core/tx/tiering/abstract/ya.make @@ -0,0 +1,11 @@ +LIBRARY() + +SRCS( + manager.cpp +) + +PEERDIR( + ydb/library/actors/core +) + +END() diff --git a/ydb/core/tx/tiering/manager.cpp b/ydb/core/tx/tiering/manager.cpp index 4af8bfcf4704..85dd6d60c10b 100644 --- a/ydb/core/tx/tiering/manager.cpp +++ b/ydb/core/tx/tiering/manager.cpp @@ -108,7 +108,7 @@ TManager::TManager(const ui64 tabletId, const NActors::TActorId& tabletActorId, NArrow::NSerialization::TSerializerContainer ConvertCompression(const NKikimrSchemeOp::TCompressionOptions& compressionProto) { NArrow::NSerialization::TSerializerContainer container; - AFL_VERIFY(container.DeserializeFromProto(compressionProto)); + container.DeserializeFromProto(compressionProto).Validate(); return container; } @@ -176,12 +176,6 @@ TTiersManager& TTiersManager::Stop(const bool needStopActor) { return *this; } -const NTiers::TManager& TTiersManager::GetManagerVerified(const TString& tierId) const { - auto it = Managers.find(tierId); - Y_ABORT_UNLESS(it != Managers.end()); - return it->second; -} - const NTiers::TManager* TTiersManager::GetManagerOptional(const TString& tierId) const { auto it = Managers.find(tierId); if (it != Managers.end()) { @@ -200,24 +194,26 @@ NMetadata::NFetcher::ISnapshotsFetcher::TPtr TTiersManager::GetExternalDataManip THashMap TTiersManager::GetTiering() const { THashMap result; - if (!IsReady()) { - return result; - } + AFL_VERIFY(IsReady()); auto snapshotPtr = std::dynamic_pointer_cast(Snapshot); Y_ABORT_UNLESS(snapshotPtr); auto& tierConfigs = snapshotPtr->GetTierConfigs(); for (auto&& i : PathIdTiering) { auto* tiering = snapshotPtr->GetTieringById(i.second); if (tiering) { + AFL_DEBUG(NKikimrServices::TX_COLUMNSHARD)("path_id", i.first)("tiering_name", i.second)("event", "activation"); result.emplace(i.first, tiering->BuildOlapTiers()); for (auto& [pathId, pathTiering] : result) { for (auto& [name, tier] : pathTiering.GetTierByName()) { + AFL_VERIFY(name != NOlap::NTiering::NCommon::DeleteTierName); auto it = tierConfigs.find(name); if (it != tierConfigs.end()) { tier->SetSerializer(NTiers::ConvertCompression(it->second.GetCompression())); } } } + } else { + AFL_ERROR(NKikimrServices::TX_COLUMNSHARD)("path_id", i.first)("tiering_name", i.second)("event", "not_found"); } } return result; diff --git a/ydb/core/tx/tiering/manager.h b/ydb/core/tx/tiering/manager.h index dd2c6767c87f..147ee27f54f8 100644 --- a/ydb/core/tx/tiering/manager.h +++ b/ydb/core/tx/tiering/manager.h @@ -1,7 +1,7 @@ #pragma once #include "external_data.h" -#include +#include "abstract/manager.h" #include #include @@ -12,6 +12,8 @@ #include +#include + namespace NKikimr::NColumnShard { namespace NTiers { @@ -43,16 +45,16 @@ class TManager { }; } -class TTiersManager { +class TTiersManager: public ITiersManager { private: class TActor; - using TManagers = std::unordered_map; + using TManagers = std::map; ui64 TabletId = 0; const TActorId TabletActorId; std::function ShardCallback; TActor* Actor = nullptr; std::unordered_map PathIdTiering; - YDB_READONLY_DEF(TManagers, Managers); + TManagers Managers; std::shared_ptr Secrets; NMetadata::NFetcher::ISnapshot::TPtr Snapshot; @@ -82,17 +84,13 @@ class TTiersManager { TTiersManager& Start(std::shared_ptr ownerPtr); TTiersManager& Stop(const bool needStopActor); - const NTiers::TManager& GetManagerVerified(const TString& tierId) const; - const NTiers::TManager* GetManagerOptional(const TString& tierId) const; - NMetadata::NFetcher::ISnapshotsFetcher::TPtr GetExternalDataManipulation() const; - - TManagers::const_iterator begin() const { - return Managers.begin(); + virtual const std::map& GetManagers() const override { + AFL_VERIFY(IsReady()); + return Managers; } + virtual const NTiers::TManager* GetManagerOptional(const TString& tierId) const override; + NMetadata::NFetcher::ISnapshotsFetcher::TPtr GetExternalDataManipulation() const; - TManagers::const_iterator end() const { - return Managers.end(); - } }; } diff --git a/ydb/core/tx/tiering/rule/manager.cpp b/ydb/core/tx/tiering/rule/manager.cpp index bdcfac875c64..c6ea9e9f6130 100644 --- a/ydb/core/tx/tiering/rule/manager.cpp +++ b/ydb/core/tx/tiering/rule/manager.cpp @@ -15,6 +15,9 @@ NMetadata::NModifications::TOperationParsingResult TTieringRulesManager::DoBuild TInternalModificationContext& /*context*/) const { NMetadata::NInternal::TTableRecord result; result.SetColumn(TTieringRule::TDecoder::TieringRuleId, NMetadata::NInternal::TYDBValue::Utf8(settings.GetObjectId())); + if (settings.GetObjectId().StartsWith("$") || settings.GetObjectId().StartsWith("_")) { + return TConclusionStatus::Fail("tiering rule cannot start with '$', '_' characters"); + } { auto fValue = settings.GetFeaturesExtractor().Extract(TTieringRule::TDecoder::DefaultColumn); if (fValue) { diff --git a/ydb/core/tx/tiering/rule/object.cpp b/ydb/core/tx/tiering/rule/object.cpp index 64aa4ba58673..59d42bdb4c8e 100644 --- a/ydb/core/tx/tiering/rule/object.cpp +++ b/ydb/core/tx/tiering/rule/object.cpp @@ -25,7 +25,7 @@ NJson::TJsonValue TTieringRule::SerializeDescriptionToJson() const { return result; } -bool TTieringRule::DeserializeDescriptionFromJson(const NJson::TJsonValue & jsonInfo) { +bool TTieringRule::DeserializeDescriptionFromJson(const NJson::TJsonValue& jsonInfo) { const NJson::TJsonValue::TArray* rules; if (!jsonInfo["rules"].GetArrayPointer(&rules)) { return false; @@ -74,7 +74,7 @@ bool TTieringRule::DeserializeFromRecord(const TDecoder& decoder, const Ydb::Val NKikimr::NOlap::TTiering TTieringRule::BuildOlapTiers() const { NOlap::TTiering result; for (auto&& r : Intervals) { - result.Add(std::make_shared(r.GetTierName(), r.GetDurationForEvict(), GetDefaultColumn())); + AFL_VERIFY(result.Add(std::make_shared(r.GetTierName(), r.GetDurationForEvict(), GetDefaultColumn()))); } return result; } diff --git a/ydb/core/tx/tiering/tier/manager.cpp b/ydb/core/tx/tiering/tier/manager.cpp index d9e9834f974f..a64d2a7603ab 100644 --- a/ydb/core/tx/tiering/tier/manager.cpp +++ b/ydb/core/tx/tiering/tier/manager.cpp @@ -10,6 +10,9 @@ NMetadata::NModifications::TOperationParsingResult TTiersManager::DoBuildPatchFr { NMetadata::NInternal::TTableRecord result; result.SetColumn(TTierConfig::TDecoder::TierName, NMetadata::NInternal::TYDBValue::Utf8(settings.GetObjectId())); + if (settings.GetObjectId().StartsWith("$") || settings.GetObjectId().StartsWith("_")) { + return TConclusionStatus::Fail("tier name cannot start with '$', '_' characters"); + } { auto fConfig = settings.GetFeaturesExtractor().Extract(TTierConfig::TDecoder::TierConfig); if (fConfig) { diff --git a/ydb/core/tx/tiering/ut/ut_tiers.cpp b/ydb/core/tx/tiering/ut/ut_tiers.cpp index a398f00edd0a..252ab7e3fd66 100644 --- a/ydb/core/tx/tiering/ut/ut_tiers.cpp +++ b/ydb/core/tx/tiering/ut/ut_tiers.cpp @@ -28,6 +28,18 @@ using namespace NColumnShard; class TFastTTLCompactionController: public NKikimr::NYDBTest::ICSController { public: + virtual bool NeedForceCompactionBacketsConstruction() const override { + return true; + } + virtual ui64 GetSmallPortionSizeDetector(const ui64 /*def*/) const override { + return 0; + } + virtual TDuration GetOptimizerFreshnessCheckDuration(const TDuration /*defaultValue*/) const override { + return TDuration::Zero(); + } + virtual TDuration GetLagForCompactionBeforeTierings(const TDuration /*def*/) const override { + return TDuration::Zero(); + } virtual TDuration GetTTLDefaultWaitingDuration(const TDuration /*defaultValue*/) const override { return TDuration::Seconds(1); } @@ -548,7 +560,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { Tests::TServer::TPtr server = new Tests::TServer(serverSettings); server->EnableGRpc(grpcPort); Tests::TClient client(serverSettings); - Tests::NCommon::TLoggerInit(server->GetRuntime()).SetComponents({ NKikimrServices::TX_COLUMNSHARD }).Initialize(); + Tests::NCommon::TLoggerInit(server->GetRuntime()).Clear().SetComponents({ NKikimrServices::TX_COLUMNSHARD }, "CS").Initialize(); auto& runtime = *server->GetRuntime(); // runtime.SetLogPriority(NKikimrServices::TX_PROXY, NLog::PRI_TRACE); @@ -563,11 +575,12 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { // runtime.SetLogPriority(NKikimrServices::TX_PROXY_SCHEME_CACHE, NLog::PRI_DEBUG); TLocalHelper lHelper(*server); + lHelper.SetOptionalStorageId("__DEFAULT"); lHelper.StartSchemaRequest("CREATE OBJECT secretAccessKey ( " "TYPE SECRET) WITH (value = ak)"); lHelper.StartSchemaRequest("CREATE OBJECT secretSecretKey ( " - "TYPE SECRET) WITH (value = sk)"); - Singleton()->SetSecretKey("sk"); + "TYPE SECRET) WITH (value = fakeSecret)"); + Singleton()->SetSecretKey("fakeSecret"); lHelper.StartSchemaRequest("CREATE OBJECT tier1 ( " "TYPE TIER) WITH (tierConfig = `" + TierConfigProtoStr + "`)"); @@ -626,9 +639,7 @@ Y_UNIT_TEST_SUITE(ColumnShardTiers) { } UNIT_ASSERT(check); } -#ifdef S3_TEST_USAGE Cerr << "storage initialized..." << Endl; -#endif /* lHelper.DropTable("/Root/olapStore/olapTable"); lHelper.StartDataRequest("DELETE FROM `/Root/olapStore/olapTable`"); diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp b/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp index 865bade5eb22..6d487a26016b 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.cpp @@ -7,10 +7,24 @@ namespace NKikimr { : TBase("BulkUpsert") { RequestsCount = TBase::GetDeriviative("Requests/Count"); - RepliesCount = TBase::GetDeriviative("Replies/Count"); ReplyDuration = TBase::GetHistogram("Replies/Duration", NMonitoring::ExponentialHistogram(15, 2, 1)); RowsCount = TBase::GetDeriviative("Rows/Count"); PackageSize = TBase::GetHistogram("Rows/PackageSize", NMonitoring::ExponentialHistogram(15, 2, 10)); + + const google::protobuf::EnumDescriptor* descriptor = ::Ydb::StatusIds::StatusCode_descriptor(); + for (ui32 i = 0; i < (ui32)descriptor->value_count(); ++i) { + auto vDescription = descriptor->value(i); + CodesCount.emplace(vDescription->name(), CreateSubGroup("reply_code", vDescription->name()).GetDeriviative("Replies/Count")); + } + } + + void TUploadCounters::OnReply(const TDuration d, const ::Ydb::StatusIds::StatusCode code) const { + const TString name = ::Ydb::StatusIds::StatusCode_Name(code); + auto it = CodesCount.find(name); + Y_ABORT_UNLESS(it != CodesCount.end()); + it->second->Add(1); + ReplyDuration->Collect(d.MilliSeconds()); } + } diff --git a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h index 0826905f9069..5b79cb5657d5 100644 --- a/ydb/core/tx/tx_proxy/upload_rows_common_impl.h +++ b/ydb/core/tx/tx_proxy/upload_rows_common_impl.h @@ -41,15 +41,12 @@ class TUploadCounters: public NColumnShard::TCommonCountersOwner { private: using TBase = NColumnShard::TCommonCountersOwner; NMonitoring::TDynamicCounters::TCounterPtr RequestsCount; - NMonitoring::TDynamicCounters::TCounterPtr RepliesCount; NMonitoring::THistogramPtr ReplyDuration; NMonitoring::TDynamicCounters::TCounterPtr RowsCount; NMonitoring::THistogramPtr PackageSize; - NMonitoring::TDynamicCounters::TCounterPtr FailsCount; - NMonitoring::THistogramPtr FailDuration; - + THashMap CodesCount; public: TUploadCounters(); @@ -59,15 +56,7 @@ class TUploadCounters: public NColumnShard::TCommonCountersOwner { PackageSize->Collect(rowsCount); } - void OnReply( const TDuration d) const { - RepliesCount->Add(1); - ReplyDuration->Collect(d.MilliSeconds()); - } - - void OnFail(const TDuration d) const { - FailsCount->Add(1); - FailDuration->Collect(d.MilliSeconds()); - } + void OnReply(const TDuration d, const ::Ydb::StatusIds::StatusCode code) const; }; @@ -265,8 +254,9 @@ class TUploadRowsBase : public TActorBootstrappedid() == NArrow::GetArrowType(type2)->id()); + auto arrowType1 = NArrow::GetArrowType(type1); + auto arrowType2 = NArrow::GetArrowType(type2); + if (arrowType1.ok() && arrowType2.ok()) { + res = (arrowType1.ValueUnsafe()->id() == arrowType2.ValueUnsafe()->id()); + } } return res; } @@ -688,15 +682,19 @@ class TUploadRowsBase : public TActorBootstrappedGet()->TabletId)); ShardRepliesLeft.erase(ev->Get()->TabletId); - ReplyIfDone(ctx); + return ReplyIfDone(ctx); } STFUNC(StateWaitResults) { @@ -1213,7 +1211,7 @@ class TUploadRowsBase : public TActorBootstrappedNow() - StartTime); + UploadCounters.OnReply(TAppData::TimeProvider->Now() - StartTime, status); SendResult(ctx, status); LOG_DEBUG_S(ctx, NKikimrServices::RPC_REQUEST, LogPrefix() << "completed with status " << status); diff --git a/ydb/core/wrappers/fake_storage.h b/ydb/core/wrappers/fake_storage.h index 510fb0c2ab4c..c672835c9874 100644 --- a/ydb/core/wrappers/fake_storage.h +++ b/ydb/core/wrappers/fake_storage.h @@ -17,7 +17,22 @@ class TFakeBucketStorage { private: mutable TMutex Mutex; TMap Data; + static inline TAtomicCounter WritesCount; + static inline TAtomicCounter DeletesCount; public: + static i64 GetWritesCount() { + return WritesCount.Val(); + } + + static i64 GetDeletesCount() { + return DeletesCount.Val(); + } + + static void ResetWriteCounters() { + WritesCount = 0; + DeletesCount = 0; + } + TMap::const_iterator begin() const { return Data.begin(); } @@ -28,6 +43,7 @@ class TFakeBucketStorage { return Data.size(); } void PutObject(const TString& objectId, const TString& data) { + WritesCount.Inc(); TGuard g(Mutex); Data[objectId] = data; } @@ -40,6 +56,8 @@ class TFakeBucketStorage { return it->second; } void Remove(const TString& objectId) { + DeletesCount.Inc(); + TGuard g(Mutex); Data.erase(objectId); } }; @@ -57,6 +75,19 @@ class TFakeExternalStorage { } public: TFakeExternalStorage() = default; + + static i64 GetWritesCount() { + return TFakeBucketStorage::GetWritesCount(); + } + + static i64 GetDeletesCount() { + return TFakeBucketStorage::GetDeletesCount(); + } + + static void ResetWriteCounters() { + return TFakeBucketStorage::ResetWriteCounters(); + } + const TFakeBucketStorage& GetBucket(const TString& bucketId) const { TGuard g(Mutex); auto it = BucketStorages.find(bucketId); @@ -106,12 +137,17 @@ class TFakeExternalStorageOperator: public IExternalStorageOperator { private: const TString Bucket; const TString SecretKey; + std::shared_ptr OwnedStorage; template void ExecuteImpl(TEvent& ev) const { ev->Get()->MutableRequest().WithBucket(Bucket); Y_ABORT_UNLESS(SecretKey == Singleton()->GetSecretKey()); - Singleton()->Execute(ev, ReplyAdapter); + if (OwnedStorage) { + OwnedStorage->Execute(ev, ReplyAdapter); + } else { + Singleton()->Execute(ev, ReplyAdapter); + } } virtual TString DoDebugString() const override { @@ -119,9 +155,10 @@ class TFakeExternalStorageOperator: public IExternalStorageOperator { } public: - TFakeExternalStorageOperator(const TString& bucket, const TString& secretKey) + TFakeExternalStorageOperator(const TString& bucket, const TString& secretKey, const std::shared_ptr storage = {}) : Bucket(bucket) , SecretKey(secretKey) + , OwnedStorage(storage) { } virtual void Execute(TEvCheckObjectExistsRequest::TPtr& ev) const override { diff --git a/ydb/core/wrappers/s3_storage_config.cpp b/ydb/core/wrappers/s3_storage_config.cpp index b3a04794feda..ffc3016e2096 100644 --- a/ydb/core/wrappers/s3_storage_config.cpp +++ b/ydb/core/wrappers/s3_storage_config.cpp @@ -94,6 +94,41 @@ TS3User::~TS3User() { Singleton()->UnRef(); } +class TS3ThreadsPoolByEndpoint { +private: + + class TPool { + public: + std::shared_ptr Executor; + ui32 ThreadsCount = 0; + TPool(const std::shared_ptr& executor, const ui32 threadsCount) + : Executor(executor) + , ThreadsCount(threadsCount) + { + + } + }; + + THashMap Pools; + TMutex Mutex; + std::shared_ptr GetPoolImpl(const TString& endpoint, const ui32 threadsCount) { + TGuard g(Mutex); + auto it = Pools.find(endpoint); + if (it == Pools.end()) { + TPool pool(std::make_shared(threadsCount), threadsCount); + it = Pools.emplace(endpoint, std::move(pool)).first; + } else if (it->second.ThreadsCount < threadsCount) { + TPool pool(std::make_shared(threadsCount), threadsCount); + it->second = std::move(pool); + } + return it->second.Executor; + } +public: + static std::shared_ptr GetPool(const TString& endpoint, const ui32 threadsCount) { + return Singleton()->GetPoolImpl(endpoint, threadsCount); + } +}; + Aws::Client::ClientConfiguration TS3ExternalStorageConfig::ConfigFromSettings(const NKikimrSchemeOp::TS3Settings& settings) { Aws::Client::ClientConfiguration config; @@ -107,10 +142,10 @@ Aws::Client::ClientConfiguration TS3ExternalStorageConfig::ConfigFromSettings(co if (settings.HasHttpRequestTimeoutMs()) { config.httpRequestTimeoutMs = settings.GetHttpRequestTimeoutMs(); } - config.executor = std::make_shared(1); + config.executor = TS3ThreadsPoolByEndpoint::GetPool(settings.GetEndpoint(), settings.GetExecutorThreadsCount()); config.enableTcpKeepAlive = true; // config.lowSpeedLimit = 0; - config.maxConnections = 5; + config.maxConnections = settings.HasMaxConnectionsCount() ? settings.GetMaxConnectionsCount() : settings.GetExecutorThreadsCount(); config.caPath = "/etc/ssl/certs"; switch (settings.GetScheme()) { diff --git a/ydb/library/actors/core/actor_bootstrapped.h b/ydb/library/actors/core/actor_bootstrapped.h index 70a6163bc53c..1776be51870b 100644 --- a/ydb/library/actors/core/actor_bootstrapped.h +++ b/ydb/library/actors/core/actor_bootstrapped.h @@ -16,7 +16,7 @@ namespace NActors { } STFUNC(StateBootstrap) { - Y_ABORT_UNLESS(ev->GetTypeRewrite() == TEvents::TSystem::Bootstrap, "Unexpected bootstrap message"); + Y_ABORT_UNLESS(ev->GetTypeRewrite() == TEvents::TSystem::Bootstrap, "Unexpected bootstrap message: %s", ev->GetTypeName().data()); using T = decltype(&TDerived::Bootstrap); TDerived& self = static_cast(*this); if constexpr (std::is_invocable_v) { diff --git a/ydb/library/actors/prof/tag.cpp b/ydb/library/actors/prof/tag.cpp index 45328fb5cbb5..df798f002b90 100644 --- a/ydb/library/actors/prof/tag.cpp +++ b/ydb/library/actors/prof/tag.cpp @@ -121,12 +121,16 @@ namespace NProfiling { TSetThreadAllocTag* SetThreadAllocTag = SetThreadAllocTagFn(); } -TMemoryProfileGuard::TMemoryProfileGuard(const TString& id) - : Id(id) +TMemoryProfileGuard::TMemoryProfileGuard(const TString& id, const bool enabled) + : Id(enabled ? id : "") { - NProfiling::TMemoryTagScope::Reset(TLocalProcessKeyState::GetInstance().Register(Id + "-Start")); + if (enabled) { + NProfiling::TMemoryTagScope::Reset(TLocalProcessKeyState::GetInstance().Register(Id + "-Start")); + } } TMemoryProfileGuard::~TMemoryProfileGuard() { - NProfiling::TMemoryTagScope::Reset(TLocalProcessKeyState::GetInstance().Register(Id + "-Finish")); + if (Id) { + NProfiling::TMemoryTagScope::Reset(TLocalProcessKeyState::GetInstance().Register(Id + "-Finish")); + } } diff --git a/ydb/library/actors/prof/tag.h b/ydb/library/actors/prof/tag.h index 1624d9d1e0f9..cde3544bc3c6 100644 --- a/ydb/library/actors/prof/tag.h +++ b/ydb/library/actors/prof/tag.h @@ -78,7 +78,7 @@ class TMemoryProfileGuard: TNonCopyable { private: const TString Id; public: - TMemoryProfileGuard(const TString& id); + TMemoryProfileGuard(const TString& id, const bool enabled = true); ~TMemoryProfileGuard(); }; diff --git a/ydb/library/conclusion/result.h b/ydb/library/conclusion/result.h index 0d9af2fbdcde..cd4034a754c2 100644 --- a/ydb/library/conclusion/result.h +++ b/ydb/library/conclusion/result.h @@ -30,11 +30,13 @@ class TConclusion { Y_ABORT_UNLESS(IsFail()); } - TConclusion(TResult&& result) + template + TConclusion(TResultArg&& result) : Result(std::move(result)) { } - TConclusion(const TResult& result) + template + TConclusion(const TResultArg& result) : Result(result) { } @@ -78,8 +80,8 @@ class TConclusion { return IsFail(); } - explicit operator bool() const { - return IsSuccess(); + operator TConclusionStatus() const { + return GetError(); } const TString& GetErrorMessage() const { diff --git a/ydb/library/conclusion/status.cpp b/ydb/library/conclusion/status.cpp index e946a122914b..30b85520e24e 100644 --- a/ydb/library/conclusion/status.cpp +++ b/ydb/library/conclusion/status.cpp @@ -1,5 +1,10 @@ #include "status.h" +#include namespace NKikimr { +void TConclusionStatus::Validate() const { + AFL_VERIFY(Ok())("problem", GetErrorMessage()); +} + } diff --git a/ydb/library/conclusion/status.h b/ydb/library/conclusion/status.h index 86e018cd3407..97b45911c678 100644 --- a/ydb/library/conclusion/status.h +++ b/ydb/library/conclusion/status.h @@ -20,11 +20,17 @@ class TConclusionStatus { TConclusionStatus(const char* errorMessage, Ydb::StatusIds::StatusCode status = Ydb::StatusIds::INTERNAL_ERROR) : ErrorMessage(errorMessage) - , Status(status) - { + , Status(status) { + Y_ABORT_UNLESS(!!ErrorMessage); + } + + TConclusionStatus(const std::string& errorMessage, Ydb::StatusIds::StatusCode status = Ydb::StatusIds::INTERNAL_ERROR) + : ErrorMessage(TString(errorMessage.data(), errorMessage.size())) + , Status(status) { Y_ABORT_UNLESS(!!ErrorMessage); } public: + void Validate() const; const TString& GetErrorMessage() const { return ErrorMessage ? *ErrorMessage : Default(); @@ -42,10 +48,18 @@ class TConclusionStatus { return TConclusionStatus(errorMessage); } + static TConclusionStatus Fail(const std::string& errorMessage) { + return TConclusionStatus(errorMessage); + } + bool IsFail() const { return !Ok(); } + bool IsSuccess() const { + return Ok(); + } + bool Ok() const { return !ErrorMessage; } @@ -54,10 +68,6 @@ class TConclusionStatus { return !!ErrorMessage; } - explicit operator bool() const { - return Ok(); - } - static TConclusionStatus Success() { return TConclusionStatus(); } diff --git a/ydb/library/conclusion/ya.make b/ydb/library/conclusion/ya.make index dd50013dc5a4..e6e350a5a55a 100644 --- a/ydb/library/conclusion/ya.make +++ b/ydb/library/conclusion/ya.make @@ -7,6 +7,7 @@ SRCS( PEERDIR( ydb/public/api/protos + ydb/library/actors/core ) END() diff --git a/ydb/library/services/services.proto b/ydb/library/services/services.proto index 41b130bf408b..e78aee0f0699 100644 --- a/ydb/library/services/services.proto +++ b/ydb/library/services/services.proto @@ -72,6 +72,7 @@ enum EServiceKikimr { TX_COLUMNSHARD = 332; BLOB_CACHE = 333; TX_COLUMNSHARD_SCAN = 334; + TX_COLUMNSHARD_SCAN_MEMORY = 335; // BLOBSTORAGE again BS_HANDOFF = 298; diff --git a/ydb/public/lib/ydb_cli/commands/ydb_service_scheme.cpp b/ydb/public/lib/ydb_cli/commands/ydb_service_scheme.cpp index 537f66baf7ee..b5d028c1dcb6 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_service_scheme.cpp +++ b/ydb/public/lib/ydb_cli/commands/ydb_service_scheme.cpp @@ -858,6 +858,8 @@ void TCommandList::Config(TConfig& config) { .StoreTrue(&Recursive); config.Opts->AddCharOption('1', "List one object per line") .StoreTrue(&FromNewLine); + config.Opts->AddCharOption('m', "Multithread recursive request") + .StoreTrue(&Multithread); AddFormats(config, { EOutputFormat::Pretty, EOutputFormat::Json }); config.SetFreeArgsMax(1); SetFreeArgTitle(0, "", "Path to list"); @@ -877,6 +879,7 @@ int TCommandList::Run(TConfig& config) { ISchemePrinter::TSettings settings = { Path, Recursive, + Multithread, FromNewLine, FillSettings(NScheme::TListDirectorySettings()), FillSettings(NTable::TDescribeTableSettings().WithTableStatistics(true)) diff --git a/ydb/public/lib/ydb_cli/commands/ydb_service_scheme.h b/ydb/public/lib/ydb_cli/commands/ydb_service_scheme.h index 080a5ac91a45..fe3a089305f3 100644 --- a/ydb/public/lib/ydb_cli/commands/ydb_service_scheme.h +++ b/ydb/public/lib/ydb_cli/commands/ydb_service_scheme.h @@ -102,6 +102,7 @@ class TCommandList : public TYdbOperationCommand, public TCommandWithPath, publi bool AdvancedMode = false; bool Recursive = false; bool FromNewLine = false; + bool Multithread = false; }; class TCommandPermissions : public TClientCommandTree { diff --git a/ydb/public/lib/ydb_cli/common/recursive_remove.cpp b/ydb/public/lib/ydb_cli/common/recursive_remove.cpp index d42b58fe4529..3c9946e29c55 100644 --- a/ydb/public/lib/ydb_cli/common/recursive_remove.cpp +++ b/ydb/public/lib/ydb_cli/common/recursive_remove.cpp @@ -72,18 +72,18 @@ template using TRemoveFunc = TStatus(*)(TClient&, const TString&, const TSettings&); template -TStatus Remove(TRemoveFunc func, TSchemeClient& schemeClient, TClient* client, const TSchemeEntry& entry, - ERecursiveRemovePrompt prompt, const TRemoveDirectorySettings& settings) +TStatus Remove(TRemoveFunc func, TSchemeClient& schemeClient, TClient* client, const ESchemeEntryType type, + const TString& path, ERecursiveRemovePrompt prompt, const TRemoveDirectorySettings& settings) { if (!client) { return TStatus(EStatus::GENERIC_ERROR, MakeIssues(TStringBuilder() << TypeName() << " not specified")); } - if (Prompt(prompt, entry.Name, entry.Type, false)) { - auto status = func(*client, entry.Name, TSettings(settings)); - if (status.GetStatus() == EStatus::SCHEME_ERROR && schemeClient.DescribePath(entry.Name).ExtractValueSync().GetStatus() == EStatus::SCHEME_ERROR) { - Cerr << "WARNING: Couldn't delete path: \'" << entry.Name << "\'. It was probably already deleted in another process" << Endl; + if (Prompt(prompt, path, type, false)) { + auto status = func(*client, path, TSettings(settings)); + if (status.GetStatus() == EStatus::SCHEME_ERROR && schemeClient.DescribePath(path).ExtractValueSync().GetStatus() == EStatus::SCHEME_ERROR) { + Cerr << "WARNING: Couldn't delete path: \'" << path << "\'. It was probably already deleted in another process" << Endl; return TStatus(EStatus::SUCCESS, {}); } return status; @@ -93,26 +93,26 @@ TStatus Remove(TRemoveFunc func, TSchemeClient& schemeClient } TStatus Remove( - TSchemeClient& schemeClient, TTableClient* tableClient, TTopicClient* topicClient, const TSchemeEntry& entry, - ERecursiveRemovePrompt prompt, const TRemoveDirectorySettings& settings) + TSchemeClient& schemeClient, TTableClient* tableClient, TTopicClient* topicClient, const ESchemeEntryType type, + const TString& path, ERecursiveRemovePrompt prompt, const TRemoveDirectorySettings& settings) { - switch (entry.Type) { + switch (type) { case ESchemeEntryType::Directory: - return Remove(&RemoveDirectory, schemeClient, &schemeClient, entry, prompt, settings); + return Remove(&RemoveDirectory, schemeClient, &schemeClient, type, path, prompt, settings); case ESchemeEntryType::ColumnStore: - return Remove(&RemoveColumnStore, schemeClient, tableClient, entry, prompt, settings); + return Remove(&RemoveColumnStore, schemeClient, tableClient, type, path, prompt, settings); case ESchemeEntryType::ColumnTable: case ESchemeEntryType::Table: - return Remove(&RemoveTable, schemeClient, tableClient, entry, prompt, settings); + return Remove(&RemoveTable, schemeClient, tableClient, type, path, prompt, settings); case ESchemeEntryType::Topic: - return Remove(&RemoveTopic, schemeClient, topicClient, entry, prompt, settings); + return Remove(&RemoveTopic, schemeClient, topicClient, type, path, prompt, settings); default: return TStatus(EStatus::UNSUPPORTED, MakeIssues(TStringBuilder() - << "Unsupported entry type: " << entry.Type)); + << "Unsupported entry type: " << type)); } } @@ -144,7 +144,7 @@ TStatus RemoveDirectoryRecursive( // output order is: Root, Recursive(children)... // we need to reverse it to delete recursively for (auto it = recursiveListResult.Entries.rbegin(); it != recursiveListResult.Entries.rend(); ++it) { - if (auto result = Remove(schemeClient, tableClient, topicClient, *it, prompt, settings); !result.IsSuccess()) { + if (auto result = Remove(schemeClient, tableClient, topicClient, it->Type, it->Name, prompt, settings); !result.IsSuccess()) { return result; } if (createProgressBar) { @@ -189,7 +189,7 @@ NYdb::TStatus RemovePathRecursive(NScheme::TSchemeClient& schemeClient, NTable:: case ESchemeEntryType::ColumnStore: return RemoveDirectoryRecursive(schemeClient, tableClient, topicClient, path, prompt, settings, true, createProgressBar); default: - return Remove(schemeClient, &tableClient, &topicClient, entity.GetEntry(), prompt, settings); + return Remove(schemeClient, &tableClient, &topicClient, entity.GetEntry().Type, path, prompt, settings); } } } diff --git a/ydb/public/lib/ydb_cli/common/scheme_printers.cpp b/ydb/public/lib/ydb_cli/common/scheme_printers.cpp index 24fd0ce7945c..9175afc7e591 100644 --- a/ydb/public/lib/ydb_cli/common/scheme_printers.cpp +++ b/ydb/public/lib/ydb_cli/common/scheme_printers.cpp @@ -15,7 +15,7 @@ TSchemePrinterBase::TSchemePrinterBase(const TDriver& driver, TSettings&& settin {} void TSchemePrinterBase::Print() { - PrintDirectoryRecursive(Settings.Path, ""); + PrintDirectoryRecursive(Settings.Path, "").GetValueSync(); } bool TSchemePrinterBase::IsDirectoryLike(const NScheme::TSchemeEntry& entry) { @@ -24,30 +24,41 @@ bool TSchemePrinterBase::IsDirectoryLike(const NScheme::TSchemeEntry& entry) { || entry.Type == NScheme::ESchemeEntryType::ColumnStore; } -void TSchemePrinterBase::PrintDirectoryRecursive(const TString& fullPath, const TString& relativePath) { - NScheme::TListDirectoryResult result = SchemeClient.ListDirectory( +NThreading::TFuture TSchemePrinterBase::PrintDirectoryRecursive(const TString& fullPath, const TString& relativePath) { + return SchemeClient.ListDirectory( fullPath, Settings.ListDirectorySettings - ).GetValueSync(); - ThrowOnError(result); + ).Apply([this, fullPath, relativePath](const NScheme::TAsyncListDirectoryResult& resultFuture) { + const auto& result = resultFuture.GetValueSync(); + ThrowOnError(result); - if (relativePath || IsDirectoryLike(result.GetEntry())) { - PrintDirectory(relativePath, result); - } else { - PrintEntry(relativePath, result.GetEntry()); - } + if (relativePath || IsDirectoryLike(result.GetEntry())) { + std::lock_guard g(Lock); + PrintDirectory(relativePath, result); + } else { + std::lock_guard g(Lock); + PrintEntry(relativePath, result.GetEntry()); + } - if (Settings.Recursive) { - for (const auto& child : result.GetChildren()) { - TString childRelativePath = relativePath + (relativePath ? "/" : "") + child.Name; - TString childFullPath = fullPath + "/" + child.Name; - if (IsDirectoryLike(child)) { - PrintDirectoryRecursive(childFullPath, childRelativePath); - } else { - PrintEntry(childRelativePath, child); + TVector> childFutures; + if (Settings.Recursive) { + for (const auto& child : result.GetChildren()) { + TString childRelativePath = relativePath + (relativePath ? "/" : "") + child.Name; + TString childFullPath = fullPath + "/" + child.Name; + if (IsDirectoryLike(child)) { + childFutures.push_back(PrintDirectoryRecursive(childFullPath, childRelativePath)); + if (!Settings.Multithread) { + childFutures.back().Wait(); + childFutures.back().TryRethrow(); + } + } else { + std::lock_guard g(Lock); + PrintEntry(childRelativePath, child); + } } } - } + return NThreading::WaitExceptionOrAll(childFutures); + }); } NTable::TDescribeTableResult TSchemePrinterBase::DescribeTable(const TString& relativePath) { diff --git a/ydb/public/lib/ydb_cli/common/scheme_printers.h b/ydb/public/lib/ydb_cli/common/scheme_printers.h index 84b791d70b19..c8ca0a2fd322 100644 --- a/ydb/public/lib/ydb_cli/common/scheme_printers.h +++ b/ydb/public/lib/ydb_cli/common/scheme_printers.h @@ -13,6 +13,7 @@ class ISchemePrinter { struct TSettings { TString Path; bool Recursive; + bool Multithread; bool FromNewLine; NScheme::TListDirectorySettings ListDirectorySettings; NTable::TDescribeTableSettings DescribeTableSettings; @@ -37,13 +38,14 @@ class TSchemePrinterBase : public ISchemePrinter { NTable::TDescribeTableResult DescribeTable(const TString& relativePath); private: - void PrintDirectoryRecursive(const TString& fullPath, const TString& relativePath); + NThreading::TFuture PrintDirectoryRecursive(const TString& fullPath, const TString& relativePath); static bool IsDirectoryLike(const NScheme::TSchemeEntry& entry); protected: NTable::TTableClient TableClient; NScheme::TSchemeClient SchemeClient; const TSettings Settings; + std::mutex Lock; }; class TDefaultSchemePrinter : public TSchemePrinterBase { diff --git a/ydb/services/bg_tasks/abstract/interface.h b/ydb/services/bg_tasks/abstract/interface.h index ff8565a353fa..fd556a2e60b0 100644 --- a/ydb/services/bg_tasks/abstract/interface.h +++ b/ydb/services/bg_tasks/abstract/interface.h @@ -332,7 +332,7 @@ class TInterfaceProtoContainer: public TCommonInterfaceContainer { if (!Object) { return result; } - result = Object->SerializeToProto(); + Object->SerializeToProto(result); TOperatorPolicy::SetClassName(result, Object->GetClassName()); return result; } diff --git a/ydb/services/metadata/common/ya.make b/ydb/services/metadata/common/ya.make index d7f364bbcdd4..59267fd0a1f0 100644 --- a/ydb/services/metadata/common/ya.make +++ b/ydb/services/metadata/common/ya.make @@ -9,7 +9,7 @@ PEERDIR( ydb/services/metadata/initializer ydb/services/metadata/abstract ydb/services/bg_tasks/abstract - ydb/core/tx/schemeshard + ydb/core/tx/scheme_cache ) YQL_LAST_ABI_VERSION() diff --git a/ydb/services/metadata/manager/abstract.h b/ydb/services/metadata/manager/abstract.h index 132afb8a8a12..71c49dbf8225 100644 --- a/ydb/services/metadata/manager/abstract.h +++ b/ydb/services/metadata/manager/abstract.h @@ -139,7 +139,7 @@ class IObjectOperationsManager: public IOperationsManager { TOperationParsingResult BuildPatchFromSettings(const NYql::TObjectSettingsImpl& settings, IOperationsManager::TInternalModificationContext& context) const { TOperationParsingResult result = DoBuildPatchFromSettings(settings, context); - if (result) { + if (result.IsSuccess()) { if (!settings.GetFeaturesExtractor().IsFinished()) { return TConclusionStatus::Fail("undefined params: " + settings.GetFeaturesExtractor().GetRemainedParamsString()); } diff --git a/ydb/services/metadata/secret/secret.h b/ydb/services/metadata/secret/secret.h index f0d35df9ce3d..70920091bf31 100644 --- a/ydb/services/metadata/secret/secret.h +++ b/ydb/services/metadata/secret/secret.h @@ -106,7 +106,9 @@ class TSecretIdOrValue { static std::optional DeserializeFromOptional(const NKikimrSchemeOp::TSecretableVariable& proto, const TString& secretInfo, const TString& defaultOwnerId = Default()) { if (proto.HasSecretId()) { return DeserializeFromProto(proto, defaultOwnerId); - } else if (secretInfo) { + } else if (proto.HasValue()) { + return DeserializeFromString(proto.GetValue().GetData()); + } if (secretInfo) { return DeserializeFromString(secretInfo, defaultOwnerId); } else { return {}; diff --git a/ydb/services/ydb/ydb_logstore_ut.cpp b/ydb/services/ydb/ydb_logstore_ut.cpp index 44b08f43c964..a85072d2ae88 100644 --- a/ydb/services/ydb/ydb_logstore_ut.cpp +++ b/ydb/services/ydb/ydb_logstore_ut.cpp @@ -163,7 +163,7 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { } { // wrong schema: not supported PK - NYdb::NLogStore::TSchema logSchema(TestSchemaColumns(), {"json_payload", "resource_id"}); + NYdb::NLogStore::TSchema logSchema(TestSchemaColumns(EPrimitiveType::Double), {"json_payload", "resource_id"}); THashMap schemaPresets; schemaPresets["default"] = logSchema; NYdb::NLogStore::TLogStoreDescription storeDescr(4, schemaPresets); @@ -333,8 +333,10 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { auto res = schemaClient.ListDirectory("/Root/LogStore/.sys").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::SUCCESS, res.GetIssues().ToString()); auto children = res.GetChildren(); - UNIT_ASSERT_VALUES_EQUAL(children.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(children[0].Name, "store_primary_index_stats"); + UNIT_ASSERT_VALUES_EQUAL(children.size(), 3); + UNIT_ASSERT_VALUES_EQUAL(children[0].Name, "store_primary_index_granule_stats"); + UNIT_ASSERT_VALUES_EQUAL(children[1].Name, "store_primary_index_portion_stats"); + UNIT_ASSERT_VALUES_EQUAL(children[2].Name, "store_primary_index_stats"); } { @@ -351,8 +353,10 @@ Y_UNIT_TEST_SUITE(YdbLogStore) { auto res = schemaClient.ListDirectory("/Root/LogStore/log1/.sys").GetValueSync(); UNIT_ASSERT_VALUES_EQUAL_C(res.GetStatus(), EStatus::SUCCESS, res.GetIssues().ToString()); auto children = res.GetChildren(); - UNIT_ASSERT_VALUES_EQUAL(children.size(), 1); - UNIT_ASSERT_VALUES_EQUAL(children[0].Name, "primary_index_stats"); + UNIT_ASSERT_VALUES_EQUAL(children.size(), 3); + UNIT_ASSERT_VALUES_EQUAL(children[0].Name, "primary_index_granule_stats"); + UNIT_ASSERT_VALUES_EQUAL(children[1].Name, "primary_index_portion_stats"); + UNIT_ASSERT_VALUES_EQUAL(children[2].Name, "primary_index_stats"); } { diff --git a/ydb/services/ydb/ydb_olapstore_ut.cpp b/ydb/services/ydb/ydb_olapstore_ut.cpp index 00dd55c4cf93..d570e9cc73cc 100644 --- a/ydb/services/ydb/ydb_olapstore_ut.cpp +++ b/ydb/services/ydb/ydb_olapstore_ut.cpp @@ -374,9 +374,9 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { } } - Y_UNIT_TEST_TWIN(BulkUpsert, NotNull) { + Y_UNIT_TEST(BulkUpsert) { for (auto& [type, name] : allowedTypes) { - TestBulkUpsert(type); + TestBulkUpsert(type); } } @@ -412,9 +412,9 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { CompareQueryResults(connection, "log1", "SELECT count(*) FROM ;"); } - Y_UNIT_TEST_TWIN(ManyTables, NotNull) { + Y_UNIT_TEST(ManyTables) { for (auto& sharding : testShardingVariants) { - TestManyTables(sharding); + TestManyTables(sharding); } } @@ -465,9 +465,9 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { } } - Y_UNIT_TEST_TWIN(DuplicateRows, NotNull) { + Y_UNIT_TEST(DuplicateRows) { for (auto& sharding : testShardingVariants) { - TestDuplicateRows(sharding); + TestDuplicateRows(sharding); } } @@ -496,7 +496,7 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { CompareQueryResults(connection, "log1", query); } - Y_UNIT_TEST_TWIN(LogLast50, NotNull) { + Y_UNIT_TEST(LogLast50) { TString query(R"( SELECT `timestamp`, `resource_type`, `resource_id`, `uid`, `level`, `message` FROM
@@ -505,11 +505,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogLast50ByResource, NotNull) { + Y_UNIT_TEST(LogLast50ByResource) { TString query(R"( SELECT `timestamp`, `resource_type`, `resource_id`, `uid`, `level`, `message` FROM
@@ -519,11 +519,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogGrepNonExisting, NotNull) { + Y_UNIT_TEST(LogGrepNonExisting) { TString query(R"( SELECT `timestamp`, `resource_type`, `resource_id`, `uid`, `level`, `message` FROM
@@ -533,11 +533,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogGrepExisting, NotNull) { + Y_UNIT_TEST(LogGrepExisting) { TString query(R"( SELECT `timestamp`, `resource_type`, `resource_id`, `uid`, `level`, `message` FROM
@@ -547,11 +547,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogNonExistingRequest, NotNull) { + Y_UNIT_TEST(LogNonExistingRequest) { TString query(R"( $request_id = '0xfaceb00c'; @@ -563,11 +563,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogExistingRequest, NotNull) { + Y_UNIT_TEST(LogExistingRequest) { TString query(R"( $request_id = '1f'; @@ -579,11 +579,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogNonExistingUserId, NotNull) { + Y_UNIT_TEST(LogNonExistingUserId) { // Should be fixed after Arrow kernel implementation for JSON_VALUE // https://st.yandex-team.ru/KIKIMR-17903 TString query(R"( @@ -598,11 +598,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogExistingUserId, NotNull) { + Y_UNIT_TEST(LogExistingUserId) { // Should be fixed after Arrow kernel implementation for JSON_VALUE // https://st.yandex-team.ru/KIKIMR-17903 TString query(R"( @@ -617,11 +617,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogPagingBefore, NotNull) { + Y_UNIT_TEST(LogPagingBefore) { TString query(R"( PRAGMA kikimr.OptEnablePredicateExtract = "true"; @@ -639,11 +639,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogPagingBetween, NotNull) { + Y_UNIT_TEST(LogPagingBetween) { TString query(R"( PRAGMA kikimr.OptEnablePredicateExtract = "true"; @@ -667,11 +667,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogPagingAfter, NotNull) { + Y_UNIT_TEST(LogPagingAfter) { TString query(R"( PRAGMA kikimr.OptEnablePredicateExtract = "true"; @@ -695,11 +695,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogCountByResource, NotNull) { + Y_UNIT_TEST(LogCountByResource) { TString query(R"( SELECT count(*) FROM
@@ -708,11 +708,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogWithUnionAllAscending, NotNull) { + Y_UNIT_TEST(LogWithUnionAllAscending) { TString query(R"( PRAGMA AnsiInForEmptyOrNullableItemsCollections; @@ -740,11 +740,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogWithUnionAllDescending, NotNull) { + Y_UNIT_TEST(LogWithUnionAllDescending) { TString query(R"( PRAGMA AnsiInForEmptyOrNullableItemsCollections; @@ -772,11 +772,11 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } - Y_UNIT_TEST_TWIN(LogTsRangeDescending, NotNull) { + Y_UNIT_TEST(LogTsRangeDescending) { TString query(R"( --PRAGMA AnsiInForEmptyOrNullableItemsCollections; @@ -794,7 +794,7 @@ Y_UNIT_TEST_SUITE(YdbOlapStore) { )"); for (auto& sharding : testShardingVariants) { - TestQuery(query, sharding); + TestQuery(query, sharding); } } } diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-0 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-0 index 0b67e9ca81bc..4eceb750868b 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-0 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-0 @@ -121,6 +121,89 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": null, + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Id": 2 + } + } + ] + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 106 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-1 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-1 index 764e1b46f706..65688d39bcba 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-1 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-1 @@ -105,7 +105,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -159,6 +160,128 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 41 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 108 + }, + "Function": { + "Id": 2 + } + } + ] + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 108 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-10 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-10 index 0ffd1a2ec833..9f33d02c188c 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-10 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-10 @@ -137,7 +137,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -190,6 +191,146 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: Inc(state._yql_agg_0)}", + "GroupBy": "item.MobilePhoneModel", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "MobilePhoneModel", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 35 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 35 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-11 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-11 index f2e4dbd35814..5a7e23644bd9 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-11 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-11 @@ -140,7 +140,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -196,6 +197,150 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: Inc(state._yql_agg_0)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "MobilePhone", + "MobilePhoneModel", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 35 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 34 + }, + { + "Id": 35 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 index 81b5d4e191e2..52260ffa6573 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-12 @@ -111,7 +111,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -157,6 +158,129 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 40 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-13 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-13 index b6ec7103da35..a6e7d891a6c4 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-13 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-13 @@ -137,7 +137,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -190,6 +191,146 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: Inc(state._yql_agg_0)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 40 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 index da4229ffa66f..8a33167f74fa 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-14 @@ -113,7 +113,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -162,6 +163,133 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchEngineID", + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 39 + }, + { + "Id": 40 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 index 6c67a5701a18..bc0e96d7a83a 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-15 @@ -121,6 +121,92 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.UserID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 index 586eef700395..628fd15e1cff 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-16 @@ -126,6 +126,96 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 40 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 index 9bc0c3c3ba33..ab3fea86d54e 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-17 @@ -122,6 +122,95 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 40 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-18 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-18 index ba67374831bf..456455447ec9 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-18 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-18 @@ -131,6 +131,100 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventTime", + "SearchPhrase", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 5 + }, + { + "Id": 40 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-19 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-19 index b80b9480bc63..b22a43b135ff 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-19 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-19 @@ -87,7 +87,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 11 } } }, @@ -126,6 +127,116 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1001", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1001", + "Name": "Limit" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "UserID" + ], + "ReadLimit": "1001", + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int64": 435090932899640449 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 10 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 11 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 5 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-2 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-2 index 2154f5e92c70..2b7f77641b38 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-2 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-2 @@ -16,12 +16,21 @@ "Plans": [ { "CTE Name": "precompute_0_0", - "Node Type": "ConstantExpr", + "Node Type": "ConstantExpr-ConstantExpr", "Operators": [ { - "Inputs": [], + "Inputs": [ + { + "InternalOperatorId": 1 + } + ], "Iterator": "precompute_0_0", "Name": "Iterator" + }, + { + "Inputs": [], + "Member": "precompute_0_0", + "Name": "Member" } ], "PlanNodeId": 6 @@ -172,6 +181,260 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "ResolutionWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 5 + } + }, + { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 2 + } + }, + { + "Column": { + "Id": 108 + }, + "Function": { + "Id": 2 + } + }, + { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 41 + } + ], + "Id": 5 + } + } + ] + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 107 + }, + { + "Id": 106 + }, + { + "Id": 108 + }, + { + "Id": 109 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "ResolutionWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 5 + } + }, + { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 21 + } + ], + "Id": 2 + } + }, + { + "Column": { + "Id": 108 + }, + "Function": { + "Id": 2 + } + }, + { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 41 + } + ], + "Id": 5 + } + } + ] + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 107 + }, + { + "Id": 106 + }, + { + "Id": 108 + }, + { + "Id": 109 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 13 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-20 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-20 index 8dc1e19f8a34..ad9f9f48b03e 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-20 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-20 @@ -105,7 +105,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 10 } } }, @@ -159,6 +160,128 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "google" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 10 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 108 + }, + "Function": { + "Id": 2 + } + } + ] + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 108 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 index 602516439293..203b0afaf361 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-21 @@ -112,7 +112,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -151,7 +152,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 10 } } }, @@ -200,6 +202,173 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1),_yql_agg_1: MIN(item.URL,state._yql_agg_1)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": {} + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Bytes": "google" + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 10 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 109 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 40 + }, + { + "Id": 14 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 index a46a508f1624..f4b956138363 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-22 @@ -136,7 +136,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -175,7 +176,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 10 } } }, @@ -204,7 +206,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 10 } } }, @@ -239,7 +242,8 @@ } ], "FunctionType": 2, - "KernelIdx": 4 + "KernelIdx": 4, + "YqlOperationId": 0 } } }, @@ -370,7 +374,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -409,7 +414,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 10 } } }, @@ -438,7 +444,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 10 } } }, @@ -473,7 +480,8 @@ } ], "FunctionType": 2, - "KernelIdx": 4 + "KernelIdx": 4, + "YqlOperationId": 0 } } }, @@ -536,6 +544,473 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Union", + "Operators": [ + { + "Name": "Union" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1),_yql_agg_2: MIN(item.URL,state._yql_agg_2),_yql_agg_3: MIN(item.Title,state._yql_agg_3)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "Title", + "URL", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": {} + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Bytes": "Google" + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 3 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 10 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Constant": { + "Bytes": ".google." + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 110 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 10 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 111 + } + ], + "FunctionType": 2, + "KernelIdx": 3 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Function": { + "Arguments": [ + { + "Id": 109 + }, + { + "Id": 112 + } + ], + "FunctionType": 2, + "KernelIdx": 4, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 113 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 40 + }, + { + "Id": 3 + }, + { + "Id": 14 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 9 + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: Inc(state._yql_agg_1)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 13, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "Title", + "URL", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": {} + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Bytes": "Google" + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 3 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 10 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Constant": { + "Bytes": ".google." + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 110 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 10 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 111 + } + ], + "FunctionType": 2, + "KernelIdx": 3 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Function": { + "Arguments": [ + { + "Id": 109 + }, + { + "Id": 112 + } + ], + "FunctionType": 2, + "KernelIdx": 4, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 113 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 40 + }, + { + "Id": 3 + }, + { + "Id": 14 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 14 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-23 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-23 index d357d2739fb8..69acb20b12ce 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-23 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-23 @@ -192,7 +192,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 10 } } }, @@ -546,6 +547,533 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "$4.EventTime" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "Age", + "BrowserCountry", + "BrowserLanguage", + "CLID", + "ClientEventTime", + "ClientIP", + "ClientTimeZone", + "CodeVersion", + "ConnectTiming", + "CookieEnable", + "CounterClass", + "CounterID", + "DNSTiming", + "DontCountHits", + "EventDate", + "EventTime", + "FUniqID", + "FetchTiming", + "FlashMajor", + "FlashMinor", + "FlashMinor2", + "FromTag", + "GoodEvent", + "HID", + "HTTPError", + "HasGCLID", + "HistoryLength", + "HitColor", + "IPNetworkID", + "Income", + "Interests", + "IsArtifical", + "IsDownload", + "IsEvent", + "IsLink", + "IsMobile", + "IsNotBounce", + "IsOldCounter", + "IsParameter", + "IsRefresh", + "JavaEnable", + "JavascriptEnable", + "LocalEventTime", + "MobilePhone", + "MobilePhoneModel", + "NetMajor", + "NetMinor", + "OS", + "OpenerName", + "OpenstatAdID", + "OpenstatCampaignID", + "OpenstatServiceName", + "OpenstatSourceID", + "OriginalURL", + "PageCharset", + "ParamCurrency", + "ParamCurrencyID", + "ParamOrderID", + "ParamPrice", + "Params", + "Referer", + "RefererCategoryID", + "RefererHash", + "RefererRegionID", + "RegionID", + "RemoteIP", + "ResolutionDepth", + "ResolutionHeight", + "ResolutionWidth", + "ResponseEndTiming", + "ResponseStartTiming", + "Robotness", + "SearchEngineID", + "SearchPhrase", + "SendTiming", + "Sex", + "SilverlightVersion1", + "SilverlightVersion2", + "SilverlightVersion3", + "SilverlightVersion4", + "SocialAction", + "SocialNetwork", + "SocialSourceNetworkID", + "SocialSourcePage", + "Title", + "TraficSourceID", + "URL", + "URLCategoryID", + "URLHash", + "URLRegionID", + "UTMCampaign", + "UTMContent", + "UTMMedium", + "UTMSource", + "UTMTerm", + "UserAgent", + "UserAgentMajor", + "UserAgentMinor", + "UserID", + "WatchID", + "WindowClientHeight", + "WindowClientWidth", + "WindowName", + "WithHash" + ], + "ReadLimit": "10", + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "google" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 10 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 41 + }, + { + "Id": 66 + }, + { + "Id": 76 + }, + { + "Id": 75 + }, + { + "Id": 105 + }, + { + "Id": 46 + }, + { + "Id": 8 + }, + { + "Id": 45 + }, + { + "Id": 52 + }, + { + "Id": 82 + }, + { + "Id": 31 + }, + { + "Id": 11 + }, + { + "Id": 7 + }, + { + "Id": 81 + }, + { + "Id": 62 + }, + { + "Id": 6 + }, + { + "Id": 5 + }, + { + "Id": 56 + }, + { + "Id": 85 + }, + { + "Id": 24 + }, + { + "Id": 25 + }, + { + "Id": 26 + }, + { + "Id": 101 + }, + { + "Id": 4 + }, + { + "Id": 58 + }, + { + "Id": 79 + }, + { + "Id": 102 + }, + { + "Id": 74 + }, + { + "Id": 64 + }, + { + "Id": 37 + }, + { + "Id": 68 + }, + { + "Id": 69 + }, + { + "Id": 42 + }, + { + "Id": 54 + }, + { + "Id": 60 + }, + { + "Id": 53 + }, + { + "Id": 33 + }, + { + "Id": 55 + }, + { + "Id": 59 + }, + { + "Id": 61 + }, + { + "Id": 16 + }, + { + "Id": 2 + }, + { + "Id": 32 + }, + { + "Id": 65 + }, + { + "Id": 34 + }, + { + "Id": 35 + }, + { + "Id": 27 + }, + { + "Id": 28 + }, + { + "Id": 12 + }, + { + "Id": 73 + }, + { + "Id": 94 + }, + { + "Id": 93 + }, + { + "Id": 92 + }, + { + "Id": 95 + }, + { + "Id": 57 + }, + { + "Id": 51 + }, + { + "Id": 90 + }, + { + "Id": 91 + }, + { + "Id": 89 + }, + { + "Id": 88 + }, + { + "Id": 36 + }, + { + "Id": 15 + }, + { + "Id": 17 + }, + { + "Id": 103 + }, + { + "Id": 18 + }, + { + "Id": 9 + }, + { + "Id": 71 + }, + { + "Id": 23 + }, + { + "Id": 22 + }, + { + "Id": 21 + }, + { + "Id": 84 + }, + { + "Id": 83 + }, + { + "Id": 70 + }, + { + "Id": 39 + }, + { + "Id": 40 + }, + { + "Id": 80 + }, + { + "Id": 67 + }, + { + "Id": 47 + }, + { + "Id": 48 + }, + { + "Id": 49 + }, + { + "Id": 50 + }, + { + "Id": 78 + }, + { + "Id": 77 + }, + { + "Id": 86 + }, + { + "Id": 87 + }, + { + "Id": 3 + }, + { + "Id": 38 + }, + { + "Id": 14 + }, + { + "Id": 19 + }, + { + "Id": 104 + }, + { + "Id": 20 + }, + { + "Id": 98 + }, + { + "Id": 99 + }, + { + "Id": 97 + }, + { + "Id": 96 + }, + { + "Id": 100 + }, + { + "Id": 13 + }, + { + "Id": 29 + }, + { + "Id": 30 + }, + { + "Id": 10 + }, + { + "Id": 1 + }, + { + "Id": 44 + }, + { + "Id": 43 + }, + { + "Id": 72 + }, + { + "Id": 63 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 5 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-24 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-24 index efd72a470d02..8fa719271a8b 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-24 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-24 @@ -89,7 +89,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -134,6 +135,121 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "$4.EventTime" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventTime", + "SearchPhrase" + ], + "ReadLimit": "10", + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 5 + }, + { + "Id": 40 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 5 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-25 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-25 index 5252f85c9b41..09bca56e623d 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-25 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-25 @@ -87,7 +87,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -129,6 +130,116 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "$5.SearchPhrase" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 40 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 5 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-26 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-26 index 4047c1c399d1..80c2eea94f99 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-26 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-26 @@ -88,7 +88,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -134,6 +135,120 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventTime", + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 5 + }, + { + "Id": 40 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 5 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-27 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-27 index 848218e72343..990334156a71 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-27 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-27 @@ -121,7 +121,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -170,6 +171,145 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "25", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "25", + "Name": "TopSort", + "TopSortBy": "$13.l" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.Count0 > 100000" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1)}", + "GroupBy": "item.CounterID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 7 + }, + { + "Id": 14 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 index 491ede2798c8..c51e844a7d55 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-28 @@ -120,7 +120,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -166,6 +167,141 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "25", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "25", + "Name": "TopSort", + "TopSortBy": "$23.l" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.Count0 > 100000" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: MIN(item.Referer,state._yql_agg_2)}", + "GroupBy": "item.key", + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "Referer" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 15 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 15 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-29 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-29 index c944f90372d8..0037dced2869 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-29 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-29 @@ -16,12 +16,21 @@ "Plans": [ { "CTE Name": "precompute_0_0", - "Node Type": "ConstantExpr", + "Node Type": "ConstantExpr-ConstantExpr", "Operators": [ { - "Inputs": [], + "Inputs": [ + { + "InternalOperatorId": 1 + } + ], "Iterator": "precompute_0_0", "Name": "Iterator" + }, + { + "Inputs": [], + "Member": "precompute_0_0", + "Name": "Member" } ], "PlanNodeId": 6 @@ -117,6 +126,156 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ResolutionWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 21 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 12, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 14, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ResolutionWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 21 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 15 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-3 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-3 index febbdfb74f86..6aadebfb50ed 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-3 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-3 @@ -144,6 +144,112 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Arguments": [ + { + "Id": 10 + } + ], + "Id": 5 + } + }, + { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 10 + } + ], + "Id": 2 + } + } + ] + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 107 + }, + { + "Id": 106 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 index c04e7dd85fcd..137a0447c63c 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-30 @@ -116,7 +116,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -171,6 +172,142 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ClientIP", + "IsRefresh", + "ResolutionWidth", + "SearchEngineID", + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 8 + }, + { + "Id": 16 + }, + { + "Id": 21 + }, + { + "Id": 39 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 index 51a3960052be..b4f172042500 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-31 @@ -116,7 +116,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -171,6 +172,142 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ClientIP", + "IsRefresh", + "ResolutionWidth", + "SearchPhrase", + "WatchID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 40 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 8 + }, + { + "Id": 16 + }, + { + "Id": 21 + }, + { + "Id": 1 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 index d800c5df143e..a84b3b94e691 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-32 @@ -134,6 +134,104 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ClientIP", + "IsRefresh", + "ResolutionWidth", + "WatchID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 8 + }, + { + "Id": 16 + }, + { + "Id": 21 + }, + { + "Id": 1 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 index 4a0b7e89454c..2e6d25e07471 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-33 @@ -121,6 +121,92 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.URL", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 14 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 index 67e2bafe2d12..9c0d1aa99ef1 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-34 @@ -126,6 +126,96 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "URL", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 14 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-35 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-35 index 14f243ff0540..53cfbe218521 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-35 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-35 @@ -124,6 +124,92 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ClientIP" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 8 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 index 1f496780cb58..1938d9f330ef 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-36 @@ -124,7 +124,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 11 } } }, @@ -153,7 +154,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 11 } } }, @@ -172,7 +174,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 0 } } }, @@ -201,7 +204,8 @@ } ], "FunctionType": 2, - "KernelIdx": 3 + "KernelIdx": 3, + "YqlOperationId": 11 } } }, @@ -230,7 +234,8 @@ } ], "FunctionType": 2, - "KernelIdx": 4 + "KernelIdx": 4, + "YqlOperationId": 12 } } }, @@ -249,7 +254,8 @@ } ], "FunctionType": 2, - "KernelIdx": 5 + "KernelIdx": 5, + "YqlOperationId": 0 } } }, @@ -268,7 +274,8 @@ } ], "FunctionType": 2, - "KernelIdx": 6 + "KernelIdx": 6, + "YqlOperationId": 0 } } }, @@ -317,6 +324,298 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.URL", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "DontCountHits", + "EventDate", + "IsRefresh", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 111 + } + ], + "FunctionType": 2, + "KernelIdx": 3, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 14 + }, + { + "Id": 113 + } + ], + "FunctionType": 2, + "KernelIdx": 4, + "YqlOperationId": 12 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "FunctionType": 2, + "KernelIdx": 5, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "FunctionType": 2, + "KernelIdx": 6, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 6 + }, + { + "Id": 14 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 index 30852faab421..0e460302098c 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-37 @@ -124,7 +124,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 11 } } }, @@ -153,7 +154,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 11 } } }, @@ -172,7 +174,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 0 } } }, @@ -201,7 +204,8 @@ } ], "FunctionType": 2, - "KernelIdx": 3 + "KernelIdx": 3, + "YqlOperationId": 11 } } }, @@ -230,7 +234,8 @@ } ], "FunctionType": 2, - "KernelIdx": 4 + "KernelIdx": 4, + "YqlOperationId": 12 } } }, @@ -249,7 +254,8 @@ } ], "FunctionType": 2, - "KernelIdx": 5 + "KernelIdx": 5, + "YqlOperationId": 0 } } }, @@ -268,7 +274,8 @@ } ], "FunctionType": 2, - "KernelIdx": 6 + "KernelIdx": 6, + "YqlOperationId": 0 } } }, @@ -317,6 +324,298 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.Title", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "DontCountHits", + "EventDate", + "IsRefresh", + "Title" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 111 + } + ], + "FunctionType": 2, + "KernelIdx": 3, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Bytes": "" + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 3 + }, + { + "Id": 113 + } + ], + "FunctionType": 2, + "KernelIdx": 4, + "YqlOperationId": 12 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "FunctionType": 2, + "KernelIdx": 5, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "FunctionType": 2, + "KernelIdx": 6, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 6 + }, + { + "Id": 3 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 index b5ecfd9969e1..80e4d3c1b68a 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-38 @@ -154,7 +154,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 11 } } }, @@ -183,7 +184,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 11 } } }, @@ -202,7 +204,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 0 } } }, @@ -231,7 +234,8 @@ } ], "FunctionType": 2, - "KernelIdx": 3 + "KernelIdx": 3, + "YqlOperationId": 12 } } }, @@ -260,7 +264,8 @@ } ], "FunctionType": 2, - "KernelIdx": 4 + "KernelIdx": 4, + "YqlOperationId": 11 } } }, @@ -279,7 +284,8 @@ } ], "FunctionType": 2, - "KernelIdx": 5 + "KernelIdx": 5, + "YqlOperationId": 0 } } }, @@ -298,7 +304,8 @@ } ], "FunctionType": 2, - "KernelIdx": 6 + "KernelIdx": 6, + "YqlOperationId": 0 } } }, @@ -351,6 +358,323 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "1000" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.URL", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "EventDate", + "IsDownload", + "IsLink", + "IsRefresh", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 53 + }, + { + "Id": 111 + } + ], + "FunctionType": 2, + "KernelIdx": 3, + "YqlOperationId": 12 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 54 + }, + { + "Id": 113 + } + ], + "FunctionType": 2, + "KernelIdx": 4, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "FunctionType": 2, + "KernelIdx": 5, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "FunctionType": 2, + "KernelIdx": 6, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 6 + }, + { + "Id": 14 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 index 31af3b96ba14..355223bb77ee 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-39 @@ -160,7 +160,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 11 } } }, @@ -189,7 +190,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 11 } } }, @@ -208,7 +210,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 0 } } }, @@ -273,6 +276,237 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "1000" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "CounterID", + "EventDate", + "IsRefresh", + "Referer", + "SearchEngineID", + "TraficSourceID", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 110 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 41 + }, + { + "Id": 6 + }, + { + "Id": 15 + }, + { + "Id": 39 + }, + { + "Id": 38 + }, + { + "Id": 14 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-4 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-4 index 3a08b8d56686..92af7df2c54b 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-4 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-4 @@ -145,6 +145,101 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "row.UserID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 10 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 index 9013e777b91c..2b32e0ce0da6 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-40 @@ -155,7 +155,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 11 } } }, @@ -184,7 +185,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 11 } } }, @@ -213,7 +215,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 11 } } }, @@ -232,7 +235,8 @@ } ], "FunctionType": 2, - "KernelIdx": 3 + "KernelIdx": 3, + "YqlOperationId": 0 } } }, @@ -251,7 +255,8 @@ } ], "FunctionType": 2, - "KernelIdx": 4 + "KernelIdx": 4, + "YqlOperationId": 0 } } }, @@ -307,6 +312,276 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "100" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,100)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,100)", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.TraficSourceID == -1 Or item.TraficSourceID == 6" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "EventDate", + "IsRefresh", + "RefererHash", + "TraficSourceID", + "URLHash" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Constant": { + "Int64": 3594120000172545465 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 103 + }, + { + "Id": 110 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 109 + }, + { + "Id": 111 + } + ], + "FunctionType": 2, + "KernelIdx": 3, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 112 + } + ], + "FunctionType": 2, + "KernelIdx": 4, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 113 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 6 + }, + { + "Id": 38 + }, + { + "Id": 104 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 index 949eaccca030..e87308e15e89 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-41 @@ -156,7 +156,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 11 } } }, @@ -185,7 +186,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 11 } } }, @@ -204,7 +206,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 0 } } }, @@ -233,7 +236,8 @@ } ], "FunctionType": 2, - "KernelIdx": 3 + "KernelIdx": 3, + "YqlOperationId": 11 } } }, @@ -262,7 +266,8 @@ } ], "FunctionType": 2, - "KernelIdx": 4 + "KernelIdx": 4, + "YqlOperationId": 11 } } }, @@ -281,7 +286,8 @@ } ], "FunctionType": 2, - "KernelIdx": 5 + "KernelIdx": 5, + "YqlOperationId": 0 } } }, @@ -300,7 +306,8 @@ } ], "FunctionType": 2, - "KernelIdx": 6 + "KernelIdx": 6, + "YqlOperationId": 0 } } }, @@ -356,6 +363,327 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "10000" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,10000)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,10000)", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.EventDate >= \"15887\" And item.EventDate <= \"15917\"" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "DontCountHits", + "EventDate", + "IsRefresh", + "URLHash", + "WindowClientHeight", + "WindowClientWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 109 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 111 + } + ], + "FunctionType": 2, + "KernelIdx": 3, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Constant": { + "Int64": 2868770270353813622 + } + } + }, + { + "Assign": { + "Column": { + "Id": 114 + }, + "Function": { + "Arguments": [ + { + "Id": 104 + }, + { + "Id": 113 + } + ], + "FunctionType": 2, + "KernelIdx": 4, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 115 + }, + "Function": { + "Arguments": [ + { + "Id": 112 + }, + { + "Id": 114 + } + ], + "FunctionType": 2, + "KernelIdx": 5, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 116 + }, + "Function": { + "Arguments": [ + { + "Id": 110 + }, + { + "Id": 115 + } + ], + "FunctionType": 2, + "KernelIdx": 6, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 116 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 6 + }, + { + "Id": 44 + }, + { + "Id": 43 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 index 03b459e31d19..31ff92a28677 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-42 @@ -153,7 +153,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 11 } } }, @@ -182,7 +183,8 @@ } ], "FunctionType": 2, - "KernelIdx": 1 + "KernelIdx": 1, + "YqlOperationId": 11 } } }, @@ -211,7 +213,8 @@ } ], "FunctionType": 2, - "KernelIdx": 2 + "KernelIdx": 2, + "YqlOperationId": 11 } } }, @@ -230,7 +233,8 @@ } ], "FunctionType": 2, - "KernelIdx": 3 + "KernelIdx": 3, + "YqlOperationId": 0 } } }, @@ -249,7 +253,8 @@ } ], "FunctionType": 2, - "KernelIdx": 4 + "KernelIdx": 4, + "YqlOperationId": 0 } } }, @@ -302,6 +307,272 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "1000" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "TopSort", + "TopSortBy": "argument.Minute" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.Minute", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.EventDate >= \"15900\" And item.EventDate <= \"15901\"" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "DontCountHits", + "EventDate", + "EventTime", + "IsRefresh" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 62 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 7 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 108 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 109 + }, + "Function": { + "Arguments": [ + { + "Id": 16 + }, + { + "Id": 108 + } + ], + "FunctionType": 2, + "KernelIdx": 1, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 110 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 111 + }, + "Function": { + "Arguments": [ + { + "Id": 62 + }, + { + "Id": 110 + } + ], + "FunctionType": 2, + "KernelIdx": 2, + "YqlOperationId": 11 + } + } + }, + { + "Assign": { + "Column": { + "Id": 112 + }, + "Function": { + "Arguments": [ + { + "Id": 109 + }, + { + "Id": 111 + } + ], + "FunctionType": 2, + "KernelIdx": 3, + "YqlOperationId": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 113 + }, + "Function": { + "Arguments": [ + { + "Id": 107 + }, + { + "Id": 112 + } + ], + "FunctionType": 2, + "KernelIdx": 4, + "YqlOperationId": 0 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 113 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 6 + }, + { + "Id": 5 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-5 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-5 index 5e4d7e56cce5..a7496640f782 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-5 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-5 @@ -145,6 +145,101 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "row.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 40 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 10 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-6 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-6 index 46bf5a6a1394..23c4d26e9af3 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-6 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-6 @@ -16,12 +16,21 @@ "Plans": [ { "CTE Name": "precompute_0_0", - "Node Type": "ConstantExpr", + "Node Type": "ConstantExpr-ConstantExpr", "Operators": [ { - "Inputs": [], + "Inputs": [ + { + "InternalOperatorId": 1 + } + ], "Iterator": "precompute_0_0", "Name": "Iterator" + }, + { + "Inputs": [], + "Member": "precompute_0_0", + "Name": "Member" } ], "PlanNodeId": 6 @@ -144,6 +153,204 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventDate" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Arguments": [ + { + "Id": 6 + } + ], + "Id": 4 + } + }, + { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 6 + } + ], + "Id": 3 + } + } + ] + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 106 + }, + { + "Id": 107 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventDate" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "GroupBy": { + "Aggregates": [ + { + "Column": { + "Id": 106 + }, + "Function": { + "Arguments": [ + { + "Id": 6 + } + ], + "Id": 4 + } + }, + { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 6 + } + ], + "Id": 3 + } + } + ] + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 106 + }, + { + "Id": 107 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 13 + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 index 1a47f956479f..580d8c9b0221 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-7 @@ -111,7 +111,8 @@ } ], "FunctionType": 2, - "KernelIdx": 0 + "KernelIdx": 0, + "YqlOperationId": 12 } } }, @@ -157,6 +158,129 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1001", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "1001", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.AdvEngineID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Assign": { + "Column": { + "Id": 106 + }, + "Constant": { + "Int32": 0 + } + } + }, + { + "Assign": { + "Column": { + "Id": 107 + }, + "Function": { + "Arguments": [ + { + "Id": 41 + }, + { + "Id": 106 + } + ], + "FunctionType": 2, + "KernelIdx": 0, + "YqlOperationId": 12 + } + } + }, + { + "Filter": { + "Predicate": { + "Id": 107 + } + } + }, + { + "Projection": { + "Columns": [ + { + "Id": 41 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-8 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-8 index 86e1bf71d158..22a7b1c8f85e 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-8 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-8 @@ -154,6 +154,109 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: Inc(state._yql_agg_0)}", + "GroupBy": "item.RegionID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "RegionID", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 9 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9 b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9 index dac074aa292f..690127ebbcd8 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_column_/queries-original-plan-column-9 @@ -258,6 +258,187 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Union", + "Operators": [ + { + "Name": "Union" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_3: SUM(item.AdvEngineID,state._yql_agg_3)}", + "GroupBy": "item.RegionID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "RegionID", + "ResolutionWidth", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 41 + }, + { + "Id": 9 + }, + { + "Id": 21 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 9 + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_2: Inc(state._yql_agg_2)}", + "GroupBy": "item.RegionID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 13, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "RegionID", + "ResolutionWidth", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "SsaProgram": { + "Command": [ + { + "Projection": { + "Columns": [ + { + "Id": 41 + }, + { + "Id": 9 + }, + { + "Id": 21 + }, + { + "Id": 10 + } + ] + } + } + ], + "Version": 4 + }, + "Table": "clickbench/plans/column/hits" + } + ], + "PlanNodeId": 14 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/column/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-0 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-0 index bcea45f3e620..1ef611b809e2 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-0 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-0 @@ -101,6 +101,72 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": null, + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-1 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-1 index 700d33e66beb..1d5ca56beefb 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-1 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-1 @@ -112,6 +112,86 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.AdvEngineID != 0" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-10 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-10 index 6ce50d447f4f..9101e37d5a23 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-10 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-10 @@ -146,6 +146,104 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: COUNT(item.UserID,state._yql_agg_0)}", + "GroupBy": "item.MobilePhoneModel", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.MobilePhoneModel != \"\"" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "MobilePhoneModel", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 10 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-11 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-11 index 891d1590f987..8283b6313d04 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-11 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-11 @@ -149,6 +149,105 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: COUNT(item.UserID,state._yql_agg_0)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.MobilePhoneModel != \"\"" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "MobilePhone", + "MobilePhoneModel", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 10 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-12 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-12 index e57e1dae5f09..e182d61c7208 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-12 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-12 @@ -116,6 +116,90 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-13 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-13 index 1a7720d827c8..94f1e09a4096 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-13 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-13 @@ -146,6 +146,104 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: COUNT(item.UserID,state._yql_agg_0)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 10 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-14 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-14 index 933bb09440a4..1e7c671f669a 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-14 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-14 @@ -118,6 +118,91 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchEngineID", + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-15 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-15 index 0ae0d6d90222..e390b499b0f4 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-15 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-15 @@ -107,6 +107,78 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.UserID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-16 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-16 index 2414ba574dab..71b4672498d7 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-16 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-16 @@ -109,6 +109,79 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-17 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-17 index 5ba6811b0e9b..f0667dbd949d 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-17 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-17 @@ -105,6 +105,78 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-18 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-18 index 15fc4d053fd4..74c39336831f 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-18 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-18 @@ -111,6 +111,80 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventTime", + "SearchPhrase", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-19 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-19 index 22d67d5a22af..c601c9a97d3f 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-19 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-19 @@ -84,6 +84,76 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1001", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1001", + "Name": "Limit" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.UserID == 435090932899640449" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 6 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-2 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-2 index de6f498faae5..8ccfa321c215 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-2 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-2 @@ -16,12 +16,21 @@ "Plans": [ { "CTE Name": "precompute_0_0", - "Node Type": "ConstantExpr", + "Node Type": "ConstantExpr-ConstantExpr", "Operators": [ { - "Inputs": [], + "Inputs": [ + { + "InternalOperatorId": 1 + } + ], "Iterator": "precompute_0_0", "Name": "Iterator" + }, + { + "Inputs": [], + "Member": "precompute_0_0", + "Name": "Member" } ], "PlanNodeId": 6 @@ -104,6 +113,130 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "ResolutionWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 12, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 14, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "ResolutionWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 15 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-20 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-20 index 08d691dc90cc..6b0f6f5c370a 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-20 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-20 @@ -112,6 +112,86 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.URL StringContains \"google\"" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-21 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-21 index 8db645ce92d5..3d4a095eb9d5 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-21 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-21 @@ -117,6 +117,91 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1),_yql_agg_1: MIN(item.URL,state._yql_agg_1)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.URL StringContains \"google\" And item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22 index d01b0792517b..a7b9fc8a11e6 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-22 @@ -230,6 +230,165 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Union", + "Operators": [ + { + "Name": "Union" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1),_yql_agg_2: MIN(item.URL,state._yql_agg_2),_yql_agg_3: MIN(item.Title,state._yql_agg_3)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.Title StringContains \"Google\" And Not item.URL StringContains \".google.\" And item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "Title", + "URL", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 10 + } + ] + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: COUNT(item.UserID,state._yql_agg_1)}", + "GroupBy": "item.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 12, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 14, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.Title StringContains \"Google\" And Not item.URL StringContains \".google.\" And item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 15, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase", + "Title", + "URL", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 16 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-23 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-23 index 523cdda7b3ef..432897d377d0 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-23 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-23 @@ -191,6 +191,180 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.URL StringContains \"google\"" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "Age", + "BrowserCountry", + "BrowserLanguage", + "CLID", + "ClientEventTime", + "ClientIP", + "ClientTimeZone", + "CodeVersion", + "ConnectTiming", + "CookieEnable", + "CounterClass", + "CounterID", + "DNSTiming", + "DontCountHits", + "EventDate", + "EventTime", + "FUniqID", + "FetchTiming", + "FlashMajor", + "FlashMinor", + "FlashMinor2", + "FromTag", + "GoodEvent", + "HID", + "HTTPError", + "HasGCLID", + "HistoryLength", + "HitColor", + "IPNetworkID", + "Income", + "Interests", + "IsArtifical", + "IsDownload", + "IsEvent", + "IsLink", + "IsMobile", + "IsNotBounce", + "IsOldCounter", + "IsParameter", + "IsRefresh", + "JavaEnable", + "JavascriptEnable", + "LocalEventTime", + "MobilePhone", + "MobilePhoneModel", + "NetMajor", + "NetMinor", + "OS", + "OpenerName", + "OpenstatAdID", + "OpenstatCampaignID", + "OpenstatServiceName", + "OpenstatSourceID", + "OriginalURL", + "PageCharset", + "ParamCurrency", + "ParamCurrencyID", + "ParamOrderID", + "ParamPrice", + "Params", + "Referer", + "RefererCategoryID", + "RefererHash", + "RefererRegionID", + "RegionID", + "RemoteIP", + "ResolutionDepth", + "ResolutionHeight", + "ResolutionWidth", + "ResponseEndTiming", + "ResponseStartTiming", + "Robotness", + "SearchEngineID", + "SearchPhrase", + "SendTiming", + "Sex", + "SilverlightVersion1", + "SilverlightVersion2", + "SilverlightVersion3", + "SilverlightVersion4", + "SocialAction", + "SocialNetwork", + "SocialSourceNetworkID", + "SocialSourcePage", + "Title", + "TraficSourceID", + "URL", + "URLCategoryID", + "URLHash", + "URLRegionID", + "UTMCampaign", + "UTMContent", + "UTMMedium", + "UTMSource", + "UTMTerm", + "UserAgent", + "UserAgentMajor", + "UserAgentMinor", + "UserID", + "WatchID", + "WindowClientHeight", + "WindowClientWidth", + "WindowName", + "WithHash" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 6 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-24 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-24 index aafd111108fa..1d61e93ad9a8 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-24 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-24 @@ -88,6 +88,77 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventTime", + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 6 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-25 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-25 index 8b02cd015639..367bb349f1b4 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-25 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-25 @@ -88,6 +88,77 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "$5.SearchPhrase" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 6 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-26 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-26 index 73abb1c271f6..2ba90d4d2238 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-26 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-26 @@ -90,6 +90,78 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventTime", + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 6 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-27 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-27 index 0578a55fa92d..afca1b848d53 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-27 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-27 @@ -126,6 +126,103 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "25", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "25", + "Name": "TopSort", + "TopSortBy": "$14.l" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.Count0 > 100000" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1)}", + "GroupBy": "item.CounterID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.URL != \"\"" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-28 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-28 index a5b57cff85ab..986403f196c0 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-28 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-28 @@ -125,6 +125,102 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "25", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "25", + "Name": "TopSort", + "TopSortBy": "$25.l" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.Count0 > 100000" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: MIN(item.Referer,state._yql_agg_2)}", + "GroupBy": "item.key", + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.Referer != \"\"" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "Referer" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-29 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-29 index ed9111fb15b2..662b7d72845f 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-29 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-29 @@ -16,12 +16,21 @@ "Plans": [ { "CTE Name": "precompute_0_0", - "Node Type": "ConstantExpr", + "Node Type": "ConstantExpr-ConstantExpr", "Operators": [ { - "Inputs": [], + "Inputs": [ + { + "InternalOperatorId": 1 + } + ], "Iterator": "precompute_0_0", "Name": "Iterator" + }, + { + "Inputs": [], + "Member": "precompute_0_0", + "Name": "Member" } ], "PlanNodeId": 6 @@ -103,6 +112,128 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ResolutionWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 12, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 14, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ResolutionWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 15 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-3 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-3 index 2b220a05fff0..7893731cc961 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-3 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-3 @@ -103,6 +103,74 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-30 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-30 index 24f212f66ece..a4d5b1bc5d55 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-30 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-30 @@ -121,6 +121,94 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ClientIP", + "IsRefresh", + "ResolutionWidth", + "SearchEngineID", + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-31 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-31 index c46804a19f9c..61bbb6c66a22 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-31 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-31 @@ -121,6 +121,94 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.SearchPhrase != \"\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ClientIP", + "IsRefresh", + "ResolutionWidth", + "SearchPhrase", + "WatchID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-32 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-32 index 09b4cb2acd5a..3caa0ba968b1 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-32 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-32 @@ -111,6 +111,81 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_2: SUM(item.IsRefresh,state._yql_agg_2)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ClientIP", + "IsRefresh", + "ResolutionWidth", + "WatchID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-33 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-33 index 7ab52ceefc56..6ae1db1d9ea6 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-33 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-33 @@ -107,6 +107,78 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.URL", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-34 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-34 index 838d9c40b5b9..87f0d196e666 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-34 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-34 @@ -109,6 +109,79 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "URL", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-35 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-35 index fbdf53de28d3..a3679758bf76 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-35 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-35 @@ -110,6 +110,78 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "ClientIP" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 7 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-36 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-36 index 106a07ebc1f7..45b864ae1fc4 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-36 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-36 @@ -120,6 +120,94 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.URL", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.DontCountHits == 0 And item.IsRefresh == 0 And item.URL != \"\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "DontCountHits", + "EventDate", + "IsRefresh", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-37 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-37 index 6f9adaf10cac..d84d2e5e67bf 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-37 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-37 @@ -120,6 +120,94 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.Title", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.DontCountHits == 0 And item.IsRefresh == 0 And item.Title != \"\"" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "DontCountHits", + "EventDate", + "IsRefresh", + "Title" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-38 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-38 index 3900c0a0d97a..0ba3a87d8cd2 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-38 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-38 @@ -154,6 +154,119 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "1000" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.URL", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And item.IsLink != 0 And item.IsDownload == 0" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "EventDate", + "IsDownload", + "IsLink", + "IsRefresh", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-39 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-39 index 6fc4de586644..2cc5681d35ae 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-39 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-39 @@ -160,6 +160,121 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "1000" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "CounterID", + "EventDate", + "IsRefresh", + "Referer", + "SearchEngineID", + "TraficSourceID", + "URL" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-4 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-4 index f0370399cac5..77ee960840a5 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-4 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-4 @@ -131,6 +131,87 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "row.UserID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 10 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-40 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-40 index 9b659bde678e..56e895833626 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-40 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-40 @@ -155,6 +155,119 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "100" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,100)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,100)", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And If And item.RefererHash == 3594120000172545465" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "EventDate", + "IsRefresh", + "RefererHash", + "TraficSourceID", + "URLHash" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-41 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-41 index 72c49bce7aa4..2f73b73dbf42 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-41 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-41 @@ -156,6 +156,120 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "10000" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,10000)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,10000)", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.CounterID == 62 And item.EventDate >= \"15887\" And item.EventDate <= \"15917\" And item.IsRefresh == 0 And item.DontCountHits == 0 And item.URLHash == 2868770270353813622" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "DontCountHits", + "EventDate", + "IsRefresh", + "URLHash", + "WindowClientHeight", + "WindowClientWidth" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-42 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-42 index 1acf534e32a3..f0ccfe4133e7 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-42 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-42 @@ -153,6 +153,118 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "Offset", + "Operators": [ + { + "Name": "Offset", + "Offset": "1000" + } + ], + "PlanNodeId": 3, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "SUM(10,1000)", + "Name": "TopSort", + "TopSortBy": "argument.Minute" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.Minute", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.CounterID == 62 And item.EventDate >= \"15900\" And item.EventDate <= \"15901\" And item.IsRefresh == 0 And item.DontCountHits == 0" + } + ], + "PlanNodeId": 10, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "CounterID", + "DontCountHits", + "EventDate", + "EventTime", + "IsRefresh" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 11 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-5 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-5 index 6af51b141aaa..ce6fcf89cc4d 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-5 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-5 @@ -131,6 +131,87 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "row.SearchPhrase", + "Name": "Aggregate" + } + ], + "PlanNodeId": 9, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "SearchPhrase" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 10 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-6 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-6 index 4d5e196100bb..64b25c06f135 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-6 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-6 @@ -16,12 +16,21 @@ "Plans": [ { "CTE Name": "precompute_0_0", - "Node Type": "ConstantExpr", + "Node Type": "ConstantExpr-ConstantExpr", "Operators": [ { - "Inputs": [], + "Inputs": [ + { + "InternalOperatorId": 1 + } + ], "Iterator": "precompute_0_0", "Name": "Iterator" + }, + { + "Inputs": [], + "Member": "precompute_0_0", + "Name": "Member" } ], "PlanNodeId": 6 @@ -103,6 +112,128 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet_1", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 5, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventDate" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1", + "Name": "Limit" + } + ], + "PlanNodeId": 12, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Name": "Aggregate" + } + ], + "PlanNodeId": 14, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "EventDate" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 15 + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-7 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-7 index 0935f1e202e0..29b3dcc1e78e 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-7 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-7 @@ -116,6 +116,90 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "1001", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "1001", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: SUM(state._yql_agg_0,1)}", + "GroupBy": "item.AdvEngineID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Filter", + "Operators": [ + { + "Name": "Filter", + "Predicate": "item.AdvEngineID != 0" + } + ], + "PlanNodeId": 7, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 8 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-8 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-8 index a0edc56657e5..3d2a2a9eda8b 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-8 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-8 @@ -137,6 +137,92 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_0: COUNT(item.UserID,state._yql_agg_0)}", + "GroupBy": "item.RegionID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "RegionID", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits", diff --git a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9 b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9 index 06a0806ba1f9..569fdcf4b140 100644 --- a/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9 +++ b/ydb/tests/functional/clickbench/canondata/test.test_plans_row_/queries-original-plan-row-9 @@ -212,6 +212,141 @@ } ] }, + "SimplifiedPlan": { + "Node Type": "Query", + "PlanNodeId": 0, + "PlanNodeType": "Query", + "Plans": [ + { + "Node Type": "ResultSet", + "PlanNodeId": 1, + "PlanNodeType": "ResultSet", + "Plans": [ + { + "Node Type": "Limit", + "Operators": [ + { + "Limit": "10", + "Name": "Limit" + } + ], + "PlanNodeId": 2, + "Plans": [ + { + "Node Type": "TopSort", + "Operators": [ + { + "Limit": "10", + "Name": "TopSort", + "TopSortBy": "argument.Count0" + } + ], + "PlanNodeId": 4, + "Plans": [ + { + "Node Type": "Union", + "Operators": [ + { + "Name": "Union" + } + ], + "PlanNodeId": 6, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_1: SUM(state._yql_agg_1,1),_yql_agg_3: SUM(item.AdvEngineID,state._yql_agg_3)}", + "GroupBy": "item.RegionID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 8, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "RegionID", + "ResolutionWidth", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 9 + } + ] + }, + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "{_yql_agg_2: COUNT(item.UserID,state._yql_agg_2)}", + "GroupBy": "item.RegionID", + "Name": "Aggregate" + } + ], + "PlanNodeId": 11, + "Plans": [ + { + "Node Type": "Aggregate", + "Operators": [ + { + "Aggregation": "state", + "GroupBy": "", + "Name": "Aggregate" + } + ], + "PlanNodeId": 13, + "Plans": [ + { + "Node Type": "TableFullScan", + "Operators": [ + { + "Name": "TableFullScan", + "ReadColumns": [ + "AdvEngineID", + "RegionID", + "ResolutionWidth", + "UserID" + ], + "ReadRanges": [ + "EventTime (-\u221e, +\u221e)", + "CounterID (-\u221e, +\u221e)", + "EventDate (-\u221e, +\u221e)", + "UserID (-\u221e, +\u221e)", + "WatchID (-\u221e, +\u221e)" + ], + "Table": "clickbench/plans/row/hits" + } + ], + "PlanNodeId": 14 + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + } + ] + }, "tables": [ { "name": "/local/clickbench/plans/row/hits",