@@ -2460,18 +2460,18 @@ struct SafeLengthState {
24602460};
24612461
24622462// Helper to safely add a word length
2463- static inline bool safe_accumulate_word (SafeLengthState* state, int32_t word_len,
2463+ static inline bool safe_accumulate_word (SafeLengthState& state, int32_t word_len,
24642464 bool word_validity) {
2465- if (! word_validity) return true ;
2465+ if (not word_validity) return true ;
24662466
24672467 int32_t temp = 0 ;
24682468 if (ARROW_PREDICT_FALSE (
2469- arrow::internal::AddWithOverflow (state-> total_len , word_len, &temp))) {
2470- state-> overflow = true ;
2469+ arrow::internal::AddWithOverflow (state. total_len , word_len, &temp))) {
2470+ state. overflow = true ;
24712471 return false ;
24722472 }
2473- state-> total_len = temp;
2474- state-> num_valid ++;
2473+ state. total_len = temp;
2474+ state. num_valid ++;
24752475 return true ;
24762476}
24772477
@@ -2512,121 +2512,97 @@ static inline const char* handle_empty_result(bool* out_valid, int32_t* out_len)
25122512 return " " ;
25132513}
25142514
2515- FORCE_INLINE
2516- const char * concat_ws_utf8_utf8 (int64_t context, const char * separator,
2517- int32_t separator_len, bool separator_validity,
2518- const char * word1, int32_t word1_len, bool word1_validity,
2519- const char * word2, int32_t word2_len, bool word2_validity,
2520- bool * out_valid, int32_t * out_len) {
2515+ struct WordArg {
2516+ const char * data;
2517+ int32_t len;
2518+ bool valid;
2519+ };
2520+
2521+ static inline const char * concat_ws_impl (int64_t context, const char * separator,
2522+ int32_t separator_len, bool separator_validity,
2523+ bool * out_valid, int32_t * out_len,
2524+ std::initializer_list<WordArg> words) {
25212525 *out_len = 0 ;
2522- // If separator is null, always return null
2523- if (!separator_validity) {
2524- *out_len = 0 ;
2526+
2527+ // Separator validity check
2528+ if ( not separator_validity) {
25252529 *out_valid = false ;
25262530 return " " ;
25272531 }
25282532
2529- // If separator is null, always return null
2530- if (!separator_validity) {
2531- return handle_overflow_failure (out_valid, out_len);
2532- }
2533-
25342533 SafeLengthState state;
25352534
2536- // Accumulate word lengths safely
2537- safe_accumulate_word (&state, word1_len, word1_validity);
2538- safe_accumulate_word (&state, word2_len, word2_validity);
2539-
2540- if (state.overflow ) {
2541- return handle_overflow_failure (out_valid, out_len);
2535+ // Accumulate all word lengths safely
2536+ for (const WordArg& w : words) {
2537+ safe_accumulate_word (state, w.len , w.valid );
2538+ if (state.overflow ) {
2539+ *out_valid = false ;
2540+ *out_len = 0 ;
2541+ return " " ;
2542+ }
25422543 }
25432544
25442545 // Add separator lengths
2545- if (!safe_add_separators (&state, separator_len)) {
2546- return handle_overflow_failure (out_valid, out_len);
2546+ if (not safe_add_separators (&state, separator_len)) {
2547+ *out_valid = false ;
2548+ *out_len = 0 ;
2549+ return " " ;
25472550 }
25482551
2549- // Handle case with no valid words
2552+ // Empty result
25502553 if (state.total_len == 0 ) {
2551- return handle_empty_result (out_valid, out_len);
2554+ *out_valid = true ;
2555+ *out_len = 0 ;
2556+ return " " ;
25522557 }
25532558
2554- // Allocate and concatenate
2559+ // Allocate memory
25552560 char * out =
25562561 reinterpret_cast <char *>(gdv_fn_context_arena_malloc (context, state.total_len ));
25572562 if (out == nullptr ) {
25582563 gdv_fn_context_set_error_msg (context, " Could not allocate memory for output string" );
2559- *out_len = 0 ;
25602564 *out_valid = false ;
2565+ *out_len = 0 ;
25612566 return " " ;
25622567 }
25632568
2569+ // Concatenate all words
25642570 char * tmp = out;
25652571 int out_idx = 0 ;
25662572 bool seenAnyValidInput = false ;
25672573
2568- concat_word (tmp, &out_idx, word1, word1_len, word1_validity, separator, separator_len,
2569- &seenAnyValidInput);
2570- concat_word (tmp, &out_idx, word2, word2_len, word2_validity, separator, separator_len,
2571- &seenAnyValidInput);
2574+ for ( const WordArg& w : words) {
2575+ concat_word (tmp, &out_idx, w. data , w. len , w. valid , separator, separator_len,
2576+ &seenAnyValidInput);
2577+ }
25722578
25732579 *out_valid = true ;
25742580 *out_len = out_idx;
25752581 return out;
25762582}
25772583
2584+ FORCE_INLINE
2585+ const char * concat_ws_utf8_utf8 (int64_t context, const char * separator,
2586+ int32_t separator_len, bool separator_validity,
2587+ const char * word1, int32_t word1_len, bool word1_validity,
2588+ const char * word2, int32_t word2_len, bool word2_validity,
2589+ bool * out_valid, int32_t * out_len) {
2590+ return concat_ws_impl (
2591+ context, separator, separator_len, separator_validity, out_valid, out_len,
2592+ {{word1, word1_len, word1_validity}, {word2, word2_len, word2_validity}});
2593+ }
2594+
25782595FORCE_INLINE
25792596const char * concat_ws_utf8_utf8_utf8 (
25802597 int64_t context, const char * separator, int32_t separator_len,
25812598 bool separator_validity, const char * word1, int32_t word1_len, bool word1_validity,
25822599 const char * word2, int32_t word2_len, bool word2_validity, const char * word3,
25832600 int32_t word3_len, bool word3_validity, bool * out_valid, int32_t * out_len) {
2584- *out_len = 0 ;
2585- if (!separator_validity) {
2586- return handle_overflow_failure (out_valid, out_len);
2587- }
2588-
2589- SafeLengthState state;
2590-
2591- safe_accumulate_word (&state, word1_len, word1_validity);
2592- safe_accumulate_word (&state, word2_len, word2_validity);
2593- safe_accumulate_word (&state, word3_len, word3_validity);
2594-
2595- if (state.overflow ) {
2596- return handle_overflow_failure (out_valid, out_len);
2597- }
2598-
2599- if (!safe_add_separators (&state, separator_len)) {
2600- return handle_overflow_failure (out_valid, out_len);
2601- }
2602-
2603- if (state.total_len == 0 ) {
2604- return handle_empty_result (out_valid, out_len);
2605- }
2606-
2607- char * out =
2608- reinterpret_cast <char *>(gdv_fn_context_arena_malloc (context, state.total_len ));
2609- if (out == nullptr ) {
2610- gdv_fn_context_set_error_msg (context, " Could not allocate memory for output string" );
2611- *out_len = 0 ;
2612- *out_valid = false ;
2613- return " " ;
2614- }
2615-
2616- char * tmp = out;
2617- int out_idx = 0 ;
2618- bool seenAnyValidInput = false ;
2619-
2620- concat_word (tmp, &out_idx, word1, word1_len, word1_validity, separator, separator_len,
2621- &seenAnyValidInput);
2622- concat_word (tmp, &out_idx, word2, word2_len, word2_validity, separator, separator_len,
2623- &seenAnyValidInput);
2624- concat_word (tmp, &out_idx, word3, word3_len, word3_validity, separator, separator_len,
2625- &seenAnyValidInput);
2626-
2627- *out_valid = true ;
2628- *out_len = out_idx;
2629- return out;
2601+ return concat_ws_impl (context, separator, separator_len, separator_validity, out_valid,
2602+ out_len,
2603+ {{word1, word1_len, word1_validity},
2604+ {word2, word2_len, word2_validity},
2605+ {word3, word3_len, word3_validity}});
26302606}
26312607
26322608FORCE_INLINE
@@ -2636,68 +2612,12 @@ const char* concat_ws_utf8_utf8_utf8_utf8(
26362612 const char * word2, int32_t word2_len, bool word2_validity, const char * word3,
26372613 int32_t word3_len, bool word3_validity, const char * word4, int32_t word4_len,
26382614 bool word4_validity, bool * out_valid, int32_t * out_len) {
2639- *out_len = 0 ;
2640- // If separator is null, always return null
2641- if (!separator_validity) {
2642- *out_len = 0 ;
2643- *out_valid = false ;
2644- return " " ;
2645- }
2646-
2647- SafeLengthState state;
2648-
2649- // Accumulate all word lengths with overflow checking
2650- safe_accumulate_word (&state, word1_len, word1_validity);
2651- safe_accumulate_word (&state, word2_len, word2_validity);
2652- safe_accumulate_word (&state, word3_len, word3_validity);
2653- safe_accumulate_word (&state, word4_len, word4_validity);
2654-
2655- if (state.overflow ) {
2656- *out_len = 0 ;
2657- *out_valid = false ;
2658- return " " ;
2659- }
2660-
2661- // Add separator lengths with overflow checking
2662- if (!safe_add_separators (&state, separator_len)) {
2663- *out_len = 0 ;
2664- *out_valid = false ;
2665- return " " ;
2666- }
2667-
2668- // Handle case with no valid words
2669- if (state.total_len == 0 ) {
2670- *out_len = 0 ;
2671- *out_valid = true ;
2672- return " " ;
2673- }
2674-
2675- // Allocate memory
2676- char * out =
2677- reinterpret_cast <char *>(gdv_fn_context_arena_malloc (context, state.total_len ));
2678- if (out == nullptr ) {
2679- gdv_fn_context_set_error_msg (context, " Could not allocate memory for output string" );
2680- *out_valid = false ;
2681- *out_len = 0 ;
2682- return " " ;
2683- }
2684-
2685- char * tmp = out;
2686- int out_idx = 0 ;
2687- bool seenAnyValidInput = false ;
2688-
2689- concat_word (tmp, &out_idx, word1, word1_len, word1_validity, separator, separator_len,
2690- &seenAnyValidInput);
2691- concat_word (tmp, &out_idx, word2, word2_len, word2_validity, separator, separator_len,
2692- &seenAnyValidInput);
2693- concat_word (tmp, &out_idx, word3, word3_len, word3_validity, separator, separator_len,
2694- &seenAnyValidInput);
2695- concat_word (tmp, &out_idx, word4, word4_len, word4_validity, separator, separator_len,
2696- &seenAnyValidInput);
2697-
2698- *out_valid = true ;
2699- *out_len = out_idx;
2700- return out;
2615+ return concat_ws_impl (context, separator, separator_len, separator_validity, out_valid,
2616+ out_len,
2617+ {{word1, word1_len, word1_validity},
2618+ {word2, word2_len, word2_validity},
2619+ {word3, word3_len, word3_validity},
2620+ {word4, word4_len, word4_validity}});
27012621}
27022622
27032623FORCE_INLINE
@@ -2708,71 +2628,13 @@ const char* concat_ws_utf8_utf8_utf8_utf8_utf8(
27082628 int32_t word3_len, bool word3_validity, const char * word4, int32_t word4_len,
27092629 bool word4_validity, const char * word5, int32_t word5_len, bool word5_validity,
27102630 bool * out_valid, int32_t * out_len) {
2711- *out_len = 0 ;
2712- // If separator is null, always return null
2713- if (!separator_validity) {
2714- *out_len = 0 ;
2715- *out_valid = false ;
2716- return " " ;
2717- }
2718-
2719- SafeLengthState state;
2720-
2721- // Accumulate all word lengths with overflow checking
2722- safe_accumulate_word (&state, word1_len, word1_validity);
2723- safe_accumulate_word (&state, word2_len, word2_validity);
2724- safe_accumulate_word (&state, word3_len, word3_validity);
2725- safe_accumulate_word (&state, word4_len, word4_validity);
2726- safe_accumulate_word (&state, word5_len, word5_validity);
2727-
2728- if (state.overflow ) {
2729- *out_len = 0 ;
2730- *out_valid = false ;
2731- return " " ;
2732- }
2733-
2734- // Add separator lengths with overflow checking
2735- if (!safe_add_separators (&state, separator_len)) {
2736- *out_len = 0 ;
2737- *out_valid = false ;
2738- return " " ;
2739- }
2740-
2741- // Handle case with no valid words
2742- if (state.total_len == 0 ) {
2743- *out_len = 0 ;
2744- *out_valid = true ;
2745- return " " ;
2746- }
2747-
2748- // Allocate memory
2749- char * out =
2750- reinterpret_cast <char *>(gdv_fn_context_arena_malloc (context, state.total_len ));
2751- if (out == nullptr ) {
2752- gdv_fn_context_set_error_msg (context, " Could not allocate memory for output string" );
2753- *out_len = 0 ;
2754- *out_valid = false ;
2755- return " " ;
2756- }
2757-
2758- char * tmp = out;
2759- int out_idx = 0 ;
2760- bool seenAnyValidInput = false ;
2761-
2762- concat_word (tmp, &out_idx, word1, word1_len, word1_validity, separator, separator_len,
2763- &seenAnyValidInput);
2764- concat_word (tmp, &out_idx, word2, word2_len, word2_validity, separator, separator_len,
2765- &seenAnyValidInput);
2766- concat_word (tmp, &out_idx, word3, word3_len, word3_validity, separator, separator_len,
2767- &seenAnyValidInput);
2768- concat_word (tmp, &out_idx, word4, word4_len, word4_validity, separator, separator_len,
2769- &seenAnyValidInput);
2770- concat_word (tmp, &out_idx, word5, word5_len, word5_validity, separator, separator_len,
2771- &seenAnyValidInput);
2772-
2773- *out_valid = true ;
2774- *out_len = out_idx;
2775- return out;
2631+ return concat_ws_impl (context, separator, separator_len, separator_validity, out_valid,
2632+ out_len,
2633+ {{word1, word1_len, word1_validity},
2634+ {word2, word2_len, word2_validity},
2635+ {word3, word3_len, word3_validity},
2636+ {word4, word4_len, word4_validity},
2637+ {word5, word5_len, word5_validity}});
27762638}
27772639
27782640FORCE_INLINE
0 commit comments