Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 20 additions & 0 deletions be/src/exec/common/variant_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -992,6 +992,10 @@ void VariantCompactionUtil::get_subpaths(int32_t max_subcolumns_count,
std::vector<std::pair<size_t, std::string_view>> paths_with_sizes;
paths_with_sizes.reserve(stats.size());
for (const auto& [path, size] : stats) {
if (path.empty()) {
paths_set_info.sparse_path_set.emplace(path);
continue;
}
paths_with_sizes.emplace_back(size, path);
}
std::sort(paths_with_sizes.begin(), paths_with_sizes.end(), std::greater());
Expand All @@ -1010,6 +1014,10 @@ void VariantCompactionUtil::get_subpaths(int32_t max_subcolumns_count,
} else {
// Apply all paths as subcolumns
for (const auto& [path, _] : stats) {
if (path.empty()) {
paths_set_info.sparse_path_set.emplace(path);
continue;
}
paths_set_info.sub_path_set.emplace(path);
}
}
Expand Down Expand Up @@ -1137,6 +1145,10 @@ Status VariantCompactionUtil::get_compaction_typed_columns(
return Status::OK();
}
for (const auto& path : typed_paths) {
if (path.empty()) {
paths_set_info.sparse_path_set.emplace(path);
continue;
}
TabletSchema::SubColumnInfo sub_column_info;
if (generate_sub_column_info(*target, parent_column->unique_id(), path, &sub_column_info)) {
inherit_column_attributes(*parent_column, sub_column_info.column);
Expand Down Expand Up @@ -1193,6 +1205,10 @@ void VariantCompactionUtil::get_compaction_subcolumns_from_subpaths(
const auto& parent_indexes = target->inverted_indexs(parent_column->unique_id());
// append subcolumns
for (const auto& subpath : sorted_subpaths) {
if (subpath.size == 0) {
paths_set_info.sparse_path_set.emplace("");
continue;
}
auto column_name = parent_column->name_lower_case() + "." + subpath.to_string();
auto column_path = PathInData(column_name);

Expand Down Expand Up @@ -1255,6 +1271,10 @@ void VariantCompactionUtil::get_compaction_subcolumns_from_data_types(
for (const auto& [path, data_types] : path_to_data_types) {
// Typed paths are materialized by get_compaction_typed_columns(); this helper only
// materializes regular subcolumns inferred from rowset data types.
if (path.get_path().empty()) {
paths_set_info.sparse_path_set.emplace(path.get_path());
continue;
}
if (data_types.empty() || path.empty() || path.get_is_typed() || path.has_nested_part()) {
continue;
}
Expand Down
37 changes: 37 additions & 0 deletions be/test/exec/common/schema_util_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,35 @@ TEST_F(SchemaUtilTest, get_subpaths_equal_to_max) {
uid_to_paths_set_info[1].sub_path_set.end());
}

TEST_F(SchemaUtilTest, get_subpaths_keeps_empty_path_sparse) {
variant_util::PathToNoneNullValues path_stats = {
{"", 1000}, {"path1", 900}, {"path2", 800}, {"path3", 700}};

TabletSchema::PathsSetInfo limited_paths;
variant_util::VariantCompactionUtil::get_subpaths(2, path_stats, limited_paths);
EXPECT_TRUE(limited_paths.sparse_path_set.contains(""));
EXPECT_FALSE(limited_paths.sub_path_set.contains(""));
EXPECT_TRUE(limited_paths.sub_path_set.contains("path1"));
EXPECT_TRUE(limited_paths.sub_path_set.contains("path2"));
EXPECT_TRUE(limited_paths.sparse_path_set.contains("path3"));

TabletSchema::PathsSetInfo exact_limit_paths;
variant_util::VariantCompactionUtil::get_subpaths(4, path_stats, exact_limit_paths);
EXPECT_TRUE(exact_limit_paths.sparse_path_set.contains(""));
EXPECT_FALSE(exact_limit_paths.sub_path_set.contains(""));
EXPECT_TRUE(exact_limit_paths.sub_path_set.contains("path1"));
EXPECT_TRUE(exact_limit_paths.sub_path_set.contains("path2"));
EXPECT_TRUE(exact_limit_paths.sub_path_set.contains("path3"));

TabletSchema::PathsSetInfo unlimited_paths;
variant_util::VariantCompactionUtil::get_subpaths(0, path_stats, unlimited_paths);
EXPECT_TRUE(unlimited_paths.sparse_path_set.contains(""));
EXPECT_FALSE(unlimited_paths.sub_path_set.contains(""));
EXPECT_TRUE(unlimited_paths.sub_path_set.contains("path1"));
EXPECT_TRUE(unlimited_paths.sub_path_set.contains("path2"));
EXPECT_TRUE(unlimited_paths.sub_path_set.contains("path3"));
}

TEST_F(SchemaUtilTest, get_subpaths_multiple_variants) {
TabletSchema schema;
TabletColumn variant1;
Expand Down Expand Up @@ -1455,6 +1484,7 @@ TEST_F(SchemaUtilTest, get_compaction_typed_columns) {
schema->append_column(variant);

std::unordered_set<std::string> typed_paths;
typed_paths.insert("");
typed_paths.insert("profile.id.name");
TabletSchemaSPtr output_schema = std::make_shared<TabletSchema>();
TabletColumnPtr parent_column = std::make_shared<TabletColumn>(variant);
Expand All @@ -1465,6 +1495,8 @@ TEST_F(SchemaUtilTest, get_compaction_typed_columns) {
EXPECT_EQ(output_schema->num_columns(), 1);
EXPECT_EQ(output_schema->column(0).type(), FieldType::OLAP_FIELD_TYPE_INT);
EXPECT_EQ(paths_set_info.typed_path_set.size(), 1);
EXPECT_TRUE(paths_set_info.sparse_path_set.contains(""));
EXPECT_FALSE(paths_set_info.typed_path_set.contains(""));

typed_paths.insert("abc");
EXPECT_FALSE(variant_util::VariantCompactionUtil::get_compaction_typed_columns(
Expand Down Expand Up @@ -1532,6 +1564,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns_from_subpaths) {
TabletColumnPtr parent_column = std::make_shared<TabletColumn>(variant);

TabletSchema::PathsSetInfo paths_set_info;
paths_set_info.sub_path_set.insert("");
paths_set_info.sub_path_set.insert("a");
paths_set_info.sub_path_set.insert("b");
doris::variant_util::PathToDataTypes path_to_data_types;
Expand All @@ -1541,6 +1574,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns_from_subpaths) {
variant_util::VariantCompactionUtil::get_compaction_subcolumns_from_subpaths(
paths_set_info, parent_column, schema, path_to_data_types, sparse_paths, output_schema);
EXPECT_EQ(output_schema->num_columns(), 2);
EXPECT_TRUE(paths_set_info.sparse_path_set.contains(""));
for (const auto& column : output_schema->columns()) {
EXPECT_EQ(column->type(), FieldType::OLAP_FIELD_TYPE_VARIANT);
}
Expand Down Expand Up @@ -1704,6 +1738,7 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns_from_data_types) {
path_to_data_types[PathInData("b")] = {std::make_shared<DataTypeString>()}; // -> STRING
path_to_data_types[PathInData("typed", true)] = {std::make_shared<DataTypeString>()};
path_to_data_types[PathInData("shared")] = {std::make_shared<DataTypeInt32>()};
path_to_data_types[PathInData("")] = {std::make_shared<DataTypeString>()};

TabletSchemaSPtr output_schema = std::make_shared<TabletSchema>();
TabletSchema::PathsSetInfo paths_set_info;
Expand Down Expand Up @@ -1756,6 +1791,8 @@ TEST_F(SchemaUtilTest, get_compaction_subcolumns_from_data_types) {
EXPECT_TRUE(paths_set_info.sub_path_set.contains("b"));
EXPECT_TRUE(paths_set_info.sub_path_set.contains("shared"));
EXPECT_FALSE(paths_set_info.sub_path_set.contains("typed"));
EXPECT_TRUE(paths_set_info.sparse_path_set.contains(""));
EXPECT_FALSE(paths_set_info.sub_path_set.contains(""));
}

// Test has_different_structure_in_same_path function indirectly through check_variant_has_no_ambiguous_paths
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !empty_key_values --
\N
\N
\N
\N


1234566
16
8888888
UPPER CASE
dkdkdkdkdkd
ooaoaaaaaaa
xmxxmmmmmm
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_variant_empty_key_sparse_bucket", "nonConcurrent") {
sql "SET default_variant_max_subcolumns_count = 0"
sql "SET default_variant_enable_doc_mode = false"
sql "SET use_v3_storage_format = false"
sql "SET default_variant_enable_typed_paths_to_sparse = false"
sql "SET default_variant_sparse_hash_shard_count = 3"
sql "SET enable_rewrite_element_at_to_slot = true"

sql "DROP TABLE IF EXISTS test_variant_empty_key_sparse_bucket"
sql """
CREATE TABLE test_variant_empty_key_sparse_bucket (
k bigint,
v variant
)
DUPLICATE KEY(`k`)
DISTRIBUTED BY HASH(k) BUCKETS 1
properties("replication_num" = "1", "disable_auto_compaction" = "true");
"""

sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (1, '{"中文" : "中文", "\\\\u4E2C\\\\u6587": "unicode"}')"""
sql "TRUNCATE TABLE test_variant_empty_key_sparse_bucket"
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (3, '{"": ""}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (4, '{"!@#^&*()": "11111"}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (5, '{"123": "456", "789": "012"}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (7, '{"AA": "UPPER CASE", "aa": "lower case"}')"""
sql "ALTER TABLE test_variant_empty_key_sparse_bucket RENAME COLUMN v Tags"
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (1, '{"tag_key1" : 123456}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (7, '{"": "UPPER CASE"}')"""
sql """
INSERT INTO test_variant_empty_key_sparse_bucket VALUES
(7, '{"":16,"OpenCapStatus":0,"AccStatus":1,"AccTimeSum":481,"LowVoltage":0,"TowedStatus":0,"EncryptLng":117.23572361077638,"deviceId":"A1100614808888"}')
"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (7, '{"": ""}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (7, '{"": "dkdkdkdkdkd"}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (7, '{"": "xmxxmmmmmm"}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (7, '{"": "ooaoaaaaaaa"}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (7, '{"": 1234566}')"""
sql """INSERT INTO test_variant_empty_key_sparse_bucket VALUES (7, '{"": 8888888}')"""

trigger_and_wait_compaction("test_variant_empty_key_sparse_bucket", "cumulative")

qt_empty_key_values """
SELECT cast(Tags[''] as text)
FROM test_variant_empty_key_sparse_bucket
ORDER BY cast(Tags[''] as string)
"""
}
Loading