Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: explicitly specify column groups for storage v2 api #39790

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions internal/core/src/common/type_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ typedef struct CStorageConfig {
bool useVirtualHost;
int64_t requestTimeoutMs;
const char* gcp_credential_json;
bool use_custom_part_upload;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is there any chance that use_custom_part_upload can be false? We can remove it from StorageConfig if it is actually not configurable.

} CStorageConfig;

typedef struct CMmapConfig {
Expand Down
74 changes: 74 additions & 0 deletions internal/core/src/segcore/arrow_fs_c.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "segcore/arrow_fs_c.h"
#include "milvus-storage/filesystem/fs.h"
#include "common/EasyAssert.h"
#include "common/type_c.h"

CStatus
InitLocalArrowFileSystemSingleton(const char* c_path) {
try {
std::string path(c_path);
milvus_storage::ArrowFileSystemConfig conf;
conf.root_path = path;
conf.storage_type = "local";
milvus_storage::ArrowFileSystemSingleton::GetInstance().Init(conf);

return milvus::SuccessCStatus();
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
}
}

void
CleanLocalArrowFileSystemSingleton() {
milvus_storage::ArrowFileSystemSingleton::GetInstance().Release();
}
Comment on lines +38 to +40
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

No need to have two separate CleanXXX functions


CStatus
InitRemoteArrowFileSystemSingleton(CStorageConfig c_storage_config) {
try {
milvus_storage::ArrowFileSystemConfig conf;
conf.address = std::string(c_storage_config.address);
conf.bucket_name = std::string(c_storage_config.bucket_name);
conf.access_key_id = std::string(c_storage_config.access_key_id);
conf.access_key_value = std::string(c_storage_config.access_key_value);
conf.root_path = std::string(c_storage_config.root_path);
conf.storage_type = std::string(c_storage_config.storage_type);
conf.cloud_provider = std::string(c_storage_config.cloud_provider);
conf.iam_endpoint = std::string(c_storage_config.iam_endpoint);
conf.log_level = std::string(c_storage_config.log_level);
conf.region = std::string(c_storage_config.region);
conf.useSSL = c_storage_config.useSSL;
conf.sslCACert = std::string(c_storage_config.sslCACert);
conf.useIAM = c_storage_config.useIAM;
conf.useVirtualHost = c_storage_config.useVirtualHost;
conf.requestTimeoutMs = c_storage_config.requestTimeoutMs;
conf.gcp_credential_json = std::string(c_storage_config.gcp_credential_json);
conf.use_custom_part_upload = c_storage_config.use_custom_part_upload;
milvus_storage::ArrowFileSystemSingleton::GetInstance().Init(conf);

return milvus::SuccessCStatus();
} catch (std::exception& e) {
return milvus::FailureCStatus(&e);
}
}

void
CleanRemoteArrowFileSystemSingleton() {
milvus_storage::ArrowFileSystemSingleton::GetInstance().Release();
}
40 changes: 40 additions & 0 deletions internal/core/src/segcore/arrow_fs_c.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// Licensed to the LF AI & Data foundation under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include "common/type_c.h"

#ifdef __cplusplus
extern "C" {
#endif

CStatus
InitLocalArrowFileSystemSingleton(const char* c_path);

void
CleanLocalArrowFileSystemSingleton();

CStatus
InitRemoteArrowFileSystemSingleton(CStorageConfig c_storage_config);

void
CleanRemoteArrowFileSystemSingleton();


#ifdef __cplusplus
}
#endif
47 changes: 47 additions & 0 deletions internal/core/src/segcore/column_groups_c.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
// Copyright 2025 Zilliz
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include "segcore/column_groups_c.h"
#include <vector>
#include <string>
#include <memory>

using VecVecInt = std::vector<std::vector<int>>;

extern "C" {

CColumnGroups NewCColumnGroups() {
auto vv = std::make_unique<VecVecInt>();
return vv.release();
}

void AddCColumnGroup(CColumnGroups cgs, int* group, int group_size) {
if (!cgs || !group)
return;

auto vv = static_cast<VecVecInt*>(cgs);
std::vector<int> new_group(group, group + group_size);
vv->emplace_back(std::move(new_group));
}

int CColumnGroupsSize(CColumnGroups cgs) {
if (!cgs)
return 0;

auto vv = static_cast<VecVecInt*>(cgs);
return static_cast<int>(vv->size());
}

void FreeCColumnGroups(CColumnGroups cgs) { delete static_cast<VecVecInt*>(cgs); }
}
35 changes: 35 additions & 0 deletions internal/core/src/segcore/column_groups_c.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
// Copyright 2025 Zilliz
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#pragma once

#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

typedef void* CColumnGroups;

CColumnGroups NewCColumnGroups();

void AddCColumnGroup(CColumnGroups cgs, int* group, int group_size);

int CColumnGroupsSize(CColumnGroups cgs);

void FreeCColumnGroups(CColumnGroups cgs);

#ifdef __cplusplus
}
#endif
44 changes: 24 additions & 20 deletions internal/core/src/segcore/packed_reader_c.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -22,33 +22,37 @@
#include <arrow/filesystem/filesystem.h>
#include <arrow/status.h>
#include <memory>
#include "common/EasyAssert.h"
#include "common/type_c.h"

int
NewPackedReader(const char* path,

CStatus
NewPackedReader(char** paths,
int64_t num_paths,
struct ArrowSchema* schema,
const int64_t buffer_size,
CPackedReader* c_packed_reader) {
try {
auto truePath = std::string(path);
auto factory = std::make_shared<milvus_storage::FileSystemFactory>();
auto conf = milvus_storage::StorageConfig();
conf.uri = "file:///tmp/";
auto trueFs = factory->BuildFileSystem(conf, &truePath).value();
auto truePaths = std::vector<std::string>(paths, paths + num_paths);
auto trueFs = milvus_storage::ArrowFileSystemSingleton::GetInstance().GetArrowFileSystem();
if (!trueFs) {
return milvus::FailureCStatus(milvus::ErrorCode::FileReadFailed, "Failed to get filesystem");
}
auto trueSchema = arrow::ImportSchema(schema).ValueOrDie();
std::set<int> needed_columns;
for (int i = 0; i < trueSchema->num_fields(); i++) {
needed_columns.emplace(i);
}
auto reader = std::make_unique<milvus_storage::PackedRecordBatchReader>(
*trueFs, path, trueSchema, needed_columns, buffer_size);
trueFs, truePaths, trueSchema, needed_columns, buffer_size);
*c_packed_reader = reader.release();
return 0;
return milvus::SuccessCStatus();
} catch (std::exception& e) {
return -1;
return milvus::FailureCStatus(&e);
}
}

int
CStatus
ReadNext(CPackedReader c_packed_reader,
CArrowArray* out_array,
CArrowSchema* out_schema) {
Expand All @@ -59,39 +63,39 @@ ReadNext(CPackedReader c_packed_reader,
std::shared_ptr<arrow::RecordBatch> record_batch;
auto status = packed_reader->ReadNext(&record_batch);
if (!status.ok()) {
return -1;
return milvus::FailureCStatus(milvus::ErrorCode::FileReadFailed, status.ToString());
}
if (record_batch == nullptr) {
// end of file
return 0;
return milvus::SuccessCStatus();
} else {
std::unique_ptr<ArrowArray> arr = std::make_unique<ArrowArray>();
std::unique_ptr<ArrowSchema> schema =
std::make_unique<ArrowSchema>();
auto status = arrow::ExportRecordBatch(
*record_batch, arr.get(), schema.get());
if (!status.ok()) {
return -1;
return milvus::FailureCStatus(milvus::ErrorCode::FileReadFailed, status.ToString());
}
*out_array = arr.release();
*out_schema = schema.release();
return 0;
return milvus::SuccessCStatus();
}
return 0;
return milvus::SuccessCStatus();
} catch (std::exception& e) {
return -1;
return milvus::FailureCStatus(&e);
}
}

int
CStatus
CloseReader(CPackedReader c_packed_reader) {
try {
auto packed_reader =
static_cast<milvus_storage::PackedRecordBatchReader*>(
c_packed_reader);
delete packed_reader;
return 0;
return milvus::SuccessCStatus();
} catch (std::exception& e) {
return -1;
return milvus::FailureCStatus(&e);
}
}
10 changes: 6 additions & 4 deletions internal/core/src/segcore/packed_reader_c.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
extern "C" {
#endif

#include "common/type_c.h"
#include <arrow/c/abi.h>

typedef void* CPackedReader;
Expand All @@ -32,8 +33,9 @@ typedef void* CArrowSchema;
* @param buffer_size The max buffer size of the packed reader.
* @param c_packed_reader The output pointer of the packed reader.
*/
int
NewPackedReader(const char* path,
CStatus
NewPackedReader(char** paths,
int64_t num_paths,
struct ArrowSchema* schema,
const int64_t buffer_size,
CPackedReader* c_packed_reader);
Expand All @@ -46,7 +48,7 @@ NewPackedReader(const char* path,
* @param out_array The output pointer of the arrow array.
* @param out_schema The output pointer of the arrow schema.
*/
int
CStatus
ReadNext(CPackedReader c_packed_reader,
CArrowArray* out_array,
CArrowSchema* out_schema);
Expand All @@ -56,7 +58,7 @@ ReadNext(CPackedReader c_packed_reader,
*
* @param c_packed_reader The packed reader to close.
*/
int
CStatus
CloseReader(CPackedReader c_packed_reader);

#ifdef __cplusplus
Expand Down
Loading
Loading