Skip to content

Commit d2236c1

Browse files
committed
[C++][Gandiva] Selectively register external C functions based on expression usage
1 parent 13d4108 commit d2236c1

5 files changed

Lines changed: 49 additions & 31 deletions

File tree

cpp/src/gandiva/engine.cc

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -348,10 +348,19 @@ Engine::~Engine() {}
348348

349349
Status Engine::Init(std::unordered_set<std::string> function_names) {
350350
used_functions_ = std::move(function_names);
351+
selective_mapping_enabled_ = true;
351352
std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs);
352353

353354
// Add mappings for global functions that can be accessed from LLVM/IR module.
354355
ARROW_RETURN_NOT_OK(AddGlobalMappings());
356+
selective_mapping_enabled_ = false;
357+
used_functions_.clear();
358+
return Status::OK();
359+
}
360+
361+
Status Engine::Init() {
362+
std::call_once(register_exported_funcs_flag, gandiva::RegisterExportedFuncs);
363+
ARROW_RETURN_NOT_OK(AddGlobalMappings());
355364
return Status::OK();
356365
}
357366

@@ -395,6 +404,7 @@ Result<std::unique_ptr<Engine>> Engine::Make(
395404
std::unique_ptr<Engine> engine{
396405
new Engine(conf, std::move(jit), std::move(shared_target_machine), cached)};
397406

407+
ARROW_RETURN_NOT_OK(engine->Init());
398408
return engine;
399409
}
400410

@@ -599,13 +609,6 @@ Result<void*> Engine::CompiledFunction(const std::string& function) {
599609

600610
void Engine::AddGlobalMappingForFunc(const std::string& name, llvm::Type* ret_type,
601611
const std::vector<llvm::Type*>& args, void* func) {
602-
bool is_internal_func =
603-
internal_functions_.find(name) != internal_functions_.end();
604-
605-
if (!(is_internal_func ||
606-
used_functions_.find(name) != used_functions_.end())) {
607-
return;
608-
}
609612
const auto prototype = llvm::FunctionType::get(ret_type, args, /*is_var_arg*/ false);
610613
llvm::Function::Create(prototype, llvm::GlobalValue::ExternalLinkage, name, module());
611614
AddAbsoluteSymbol(*lljit_, name, func);

cpp/src/gandiva/engine.h

Lines changed: 19 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -94,8 +94,11 @@ class GANDIVA_EXPORT Engine {
9494
llvm::Constant* CreateGlobalStringPtr(const std::string& string);
9595

9696
Status Init(std::unordered_set<std::string> function_names);
97+
Status Init();
9798

9899
private:
100+
friend class ExternalCFunctions;
101+
99102
Engine(const std::shared_ptr<Configuration>& conf,
100103
std::unique_ptr<llvm::orc::LLJIT> lljit,
101104
std::shared_ptr<llvm::TargetMachine> target_machine, bool cached);
@@ -127,27 +130,28 @@ class GANDIVA_EXPORT Engine {
127130
std::unordered_set<std::string> used_functions_;
128131

129132
static inline const std::unordered_set<std::string> internal_functions_ = {
130-
"gdv_fn_context_arena_malloc",
131-
"gdv_fn_context_set_error_msg",
132-
"gdv_fn_populate_varlen_vector",
133-
"gdv_fn_context_arena_reset",
134-
"gdv_fn_in_expr_lookup_int32",
135-
"gdv_fn_in_expr_lookup_int64",
136-
"gdv_fn_in_expr_lookup_float",
137-
"gdv_fn_in_expr_lookup_double",
138-
"gdv_fn_in_expr_lookup_decimal",
139-
"gdv_fn_in_expr_lookup_utf8",
140-
141-
"bitMapGetBit",
142-
"bitMapSetBit",
143-
"bitMapValidityGetBit",
144-
"bitMapClearBitIfFalse",
133+
"gdv_fn_context_arena_malloc",
134+
"gdv_fn_context_set_error_msg",
135+
"gdv_fn_populate_varlen_vector",
136+
"gdv_fn_context_arena_reset",
137+
"gdv_fn_in_expr_lookup_int32",
138+
"gdv_fn_in_expr_lookup_int64",
139+
"gdv_fn_in_expr_lookup_float",
140+
"gdv_fn_in_expr_lookup_double",
141+
"gdv_fn_in_expr_lookup_decimal",
142+
"gdv_fn_in_expr_lookup_utf8",
143+
144+
"bitMapGetBit",
145+
"bitMapSetBit",
146+
"bitMapValidityGetBit",
147+
"bitMapClearBitIfFalse",
145148
};
146149

147150
bool optimize_ = true;
148151
bool module_finalized_ = false;
149152
bool cached_;
150153
bool functions_loaded_ = false;
154+
bool selective_mapping_enabled_ = false;
151155
std::shared_ptr<FunctionRegistry> function_registry_;
152156
std::string module_ir_;
153157
// The lifetime of the TargetMachine is shared with LLJIT. This prevents unnecessary

cpp/src/gandiva/external_c_functions.cc

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,21 @@ namespace gandiva {
6767
Status ExternalCFunctions::AddMappings(Engine* engine) const {
6868
const auto& c_funcs = function_registry_->GetCFunctions();
6969
const auto types = engine->types();
70+
71+
// Build allowed set ONCE before the loop
72+
std::unordered_set<std::string> allowed;
73+
if (engine->selective_mapping_enabled_) {
74+
allowed = engine->internal_functions_;
75+
allowed.insert(engine->used_functions_.begin(), engine->used_functions_.end());
76+
}
77+
7078
for (auto& [func, func_ptr] : c_funcs) {
79+
const std::string& name = func.pc_name();
80+
81+
if (engine->selective_mapping_enabled_ && !allowed.contains(name)) {
82+
continue;
83+
}
84+
7185
for (const auto& sig : func.signatures()) {
7286
ARROW_ASSIGN_OR_RAISE(auto llvm_signature, MapToLLVMSignature(sig, func, types));
7387
auto& [args, ret_llvm_type] = llvm_signature;

cpp/src/gandiva/llvm_generator.cc

Lines changed: 5 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -67,23 +67,20 @@ Status LLVMGenerator::SetLLVMObjectCache(GandivaObjectCache& object_cache) {
6767
return engine_->SetLLVMObjectCache(object_cache);
6868
}
6969

70-
arrow::Result<ValueValidityPairPtr> LLVMGenerator::Decompose(
71-
const ExpressionPtr& expr) {
70+
arrow::Result<ValueValidityPairPtr> LLVMGenerator::Decompose(const ExpressionPtr& expr) {
7271
ExprDecomposer decomposer(*function_registry_, annotator_);
7372

7473
ValueValidityPairPtr value_validity;
75-
ARROW_RETURN_NOT_OK(
76-
decomposer.Decompose(*expr->root(), &value_validity));
74+
ARROW_RETURN_NOT_OK(decomposer.Decompose(*expr->root(), &value_validity));
7775

7876
auto& used_functions = decomposer.UsedFunctions();
79-
functions_in_exprs_.insert(
80-
used_functions.begin(),
81-
used_functions.end());
77+
functions_in_exprs_.insert(used_functions.begin(), used_functions.end());
8278

8379
return value_validity;
8480
}
8581

86-
Status LLVMGenerator::Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity, const FieldDescriptorPtr output) {
82+
Status LLVMGenerator::Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity,
83+
const FieldDescriptorPtr output) {
8784
int idx = static_cast<int>(compiled_exprs_.size());
8885
// Generate the IR function for the decomposed expression.
8986
auto compiled_expr = std::make_unique<CompiledExpr>(value_validity, output);

cpp/src/gandiva/llvm_generator.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -194,7 +194,7 @@ class GANDIVA_EXPORT LLVMGenerator {
194194

195195
// Generate the code for one expression for default mode, with the output of
196196
// the expression going to 'output'.
197-
Status Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity,
197+
Status Add(const ExpressionPtr expr, ValueValidityPairPtr value_validity,
198198
const FieldDescriptorPtr output);
199199

200200
/// Generate code to load the vector at specified index in the 'arg_addrs' array.

0 commit comments

Comments
 (0)