From 709c07fec19654a0e11c8b85c9231afe67ec0a47 Mon Sep 17 00:00:00 2001 From: "David E. Wheeler" Date: Mon, 15 Dec 2025 18:38:23 -0500 Subject: [PATCH] Add kv_list and refactor session_settings with it A new structure, kv_list, represents a list of key/value pairs. It does not use an array of structs for each pair, because a custom GUC value must be allocated as a single block, The `kv_list` constructor takes a Postgres `List` argument, iterates over the keys and values, and then allocates a block of memory for them (with options to use `palloc`, `malloc`, or `guc_malloc` (which falls back on `malloc` prior to Postgrs 16)). It then iterates over them again to pack them into the memory space beyond the `kv_list` struct itself. The design follows the precedent of the `ConvertTimeZoneAbbrevs()` function in Postgres's `datetime.c`. Use it in `chfdw_check_settings_guc()` to create a `kv_list` from the `List` returned by `chfdw_parse_options()` and assign it to `**extra`. Postgres hands this pointer to `chfdw_settings_assign_hook()`, where we assign it to the `ch_session_settings_list` global, which can then be fetched via `chfdw_get_session_settings()`. Of course this would be a tricky interface to iterate manually, so also add another struct, `kv_iter`, that handles the logic of iterating through the keys and values in a `kv_list`. Since it sets up the values for each iteration of the loop, the logic using it is simpler than the previous use of `List`, as seen in the http and binary engines. While at it, cChange `chfdw_parse_options()` to use `pstrdup()` instead of `strdup`, since its memory is handled by a Postgres memory context (and surely was leaking before). --- src/binary.cpp | 24 +++------ src/http.c | 8 ++- src/include/engine.h | 10 ++-- src/include/fdw.h | 3 +- src/include/kv_list.h | 60 ++++++++++++++++++++++ src/kv_list.c | 117 ++++++++++++++++++++++++++++++++++++++++++ src/option.c | 60 +++++++++++++++++----- 7 files changed, 242 insertions(+), 40 deletions(-) create mode 100644 src/include/kv_list.h create mode 100644 src/kv_list.c diff --git a/src/binary.cpp b/src/binary.cpp index f392da6..fa03a78 100644 --- a/src/binary.cpp +++ b/src/binary.cpp @@ -173,23 +173,15 @@ static void set_resp_error(ch_binary_response_t * resp, const char * str) */ static QuerySettings ch_binary_settings(const ch_query *query) { - ListCell *lc; - auto res = QuerySettings{}; - foreach (lc, (List *) query->settings) - { - /* - * foreach reads a non-const, so we have to cast. Would be nice to use - * foreach_ptr: - * - * foreach_ptr(DefElem, setting, query->settings) - * - * But it's only available in Postgres 17 and later. - */ - DefElem *setting = (DefElem *) lfirst(lc); - res.insert_or_assign(setting->defname, QuerySettingsField{strVal(setting->arg), 1}); - } + kv_iter iter; + auto res = QuerySettings{}; - return res; + for (iter = new_kv_iter(query->settings); !kv_iter_done(&iter); kv_iter_next(&iter)) + { + res.insert_or_assign(iter.name, QuerySettingsField{iter.value, 1}); + } + + return res; } static void set_state_error(ch_binary_read_state_t * state, const char * str) diff --git a/src/http.c b/src/http.c index c67097f..801d8a7 100644 --- a/src/http.c +++ b/src/http.c @@ -158,8 +158,7 @@ ch_http_simple_query(ch_http_connection_t * conn, const ch_query * query) static char errbuffer[CURL_ERROR_SIZE]; struct curl_slist *headers = NULL; CURLU *cu = curl_url(); - ListCell *lc; - DefElem *setting; + kv_iter iter; char *buf = NULL; ch_http_response_t *resp = calloc(sizeof(ch_http_response_t), 1); @@ -178,10 +177,9 @@ ch_http_simple_query(ch_http_connection_t * conn, const ch_query * query) pfree(buf); /* Append each of the settings as a query param. */ - foreach(lc, (List *) query->settings) + for (iter = new_kv_iter(query->settings); !kv_iter_done(&iter); kv_iter_next(&iter)) { - setting = (DefElem *) lfirst(lc); - buf = psprintf("%s=%s", setting->defname, strVal(setting->arg)); + buf = psprintf("%s=%s", iter.name, iter.value); curl_url_set(cu, CURLUPART_QUERY, buf, CURLU_APPENDQUERY | CURLU_URLENCODE); pfree(buf); } diff --git a/src/include/engine.h b/src/include/engine.h index 3685376..4600a60 100644 --- a/src/include/engine.h +++ b/src/include/engine.h @@ -1,7 +1,7 @@ #ifndef CLICKHOUSE_ENGINE_H #define CLICKHOUSE_ENGINE_H -#include "nodes/pathnodes.h" +#include "kv_list.h" /* * ch_connection_details defines the details for connecting to ClickHouse. @@ -20,10 +20,10 @@ typedef struct */ typedef struct { - const char *sql; - const List *settings; -} ch_query; + const char *sql; + const kv_list *settings; +} ch_query; -#define new_query(sql) {sql, chfdw_parse_options(ch_session_settings, true, false)} +#define new_query(sql) {sql, chfdw_get_session_settings()} #endif /* CLICKHOUSE_ENGINE_H */ diff --git a/src/include/fdw.h b/src/include/fdw.h index 378bef9..0658d27 100644 --- a/src/include/fdw.h +++ b/src/include/fdw.h @@ -199,7 +199,8 @@ extern void chfdw_report_error(int elevel, ch_connection conn, bool clear, const char *sql); /* in option.c */ -extern char *ch_session_settings; +extern kv_list * chfdw_get_session_settings(void); + extern void chfdw_extract_options(List * defelems, char **driver, char **host, int *port, char **dbname, char **username, char **password); diff --git a/src/include/kv_list.h b/src/include/kv_list.h new file mode 100644 index 0000000..1712df6 --- /dev/null +++ b/src/include/kv_list.h @@ -0,0 +1,60 @@ +#ifndef PG_CLICKHOUSE_KV_LIST_H +#define PG_CLICKHOUSE_KV_LIST_H + +#include +#include "postgres.h" +#include "nodes/pathnodes.h" + +/* + * A simple data structure with a list of key/value string pairs. Use + * new_kv_list_from_list() to create. + */ +typedef struct kv_list +{ + int length; + /* key/value char * pairs follow the length field. */ + char data[]; +} kv_list; + +/* + * Iterator for a kv_list. Use new_kv_iter() to create. + * + * for (kv_iter iter = new_kv_iter(kv); !kv_iter_done(&iter); kv_iter_next(&iter)) + * { + * printf("%i, %s => %s\n", iter.num, iter.name, iter.value); + * } + */ +typedef struct kv_iter { + int togo; + char * name; + char * value; +} kv_iter; + +/* + * Defines the allocator to use when creating a new kv_list. + * kv_pair_guc_malloc is the same as kv_pair_malloc on Postgres 15 and + * earlier, so be sure to free() the memory on those versions. + */ +enum kv_pair_alloc +{ + kv_pair_guc_malloc, + kv_pair_malloc, + kv_pair_palloc, +}; + +/* + * Create a new kv_list from a PostgreSQL List of DefElem. Allocate the memory + * using the specified allocator. +*/ +kv_list * new_kv_list_from_pg_list(List * list, int allocate); + +/* Create a new kv_iter for a key_pairs. */ +kv_iter new_kv_iter(const kv_list * ns); + +/* Iterate to the next item. Returns false if there are no items. */ +bool kv_iter_next(kv_iter * state); + +/* Retruns true if iteration by kv_iter is complete. */ +bool kv_iter_done(kv_iter * state); + +#endif /* PG_CLICKHOUSE_KV_LIST_H */ diff --git a/src/kv_list.c b/src/kv_list.c new file mode 100644 index 0000000..34999cf --- /dev/null +++ b/src/kv_list.c @@ -0,0 +1,117 @@ +#include +#include "kv_list.h" +#include "utils/elog.h" +#include "nodes/pathnodes.h" +#include "utils/guc.h" + +static kv_list * allocate_list(size_t size, int allocate) +{ + kv_list *pairs; + + switch (allocate) + { + case kv_pair_guc_malloc: + /* Fall through to malloc prior to Postgres 16. */ +#if PG_VERSION_NUM >= 160000 + pairs = guc_malloc(ERROR, size); + break; +#endif + case kv_pair_malloc: + pairs = malloc(size); + break; + case kv_pair_palloc: + pairs = palloc(size); + break; + default: + ereport(ERROR, + (errcode(ERRCODE_FDW_ERROR), + errmsg("unknown kv_pair_alloc %i", allocate))); + } + + if (!pairs) + ereport(ERROR, + (errcode(ERRCODE_FDW_OUT_OF_MEMORY), + errmsg("out of memory"))); + + return pairs; +} + +extern kv_list * new_kv_list_from_pg_list(List * list, int allocate) +{ + ListCell *lc; + DefElem *elem; + kv_list *pairs; + size_t kv_size = 0; + size_t ck_size = 0; + + /* Count up space for dynamic key/value pairs. */ + foreach(lc, list) + { + elem = (DefElem *) lfirst(lc); + kv_size += 2 + strlen(elem->defname) + strlen(strVal(elem->arg)); + } + + /* Alloc the result ... */ + pairs = allocate_list(offsetof(kv_list, data) + kv_size, allocate); + + /* ... and fill it in */ + pairs->length = list_length(list); + + /* In this loop, size ck_size reprises the kv_size calculation above. */ + foreach(lc, list) + { + char *str; + + /* Append the element name and arg that constitute the pair. */ + elem = (DefElem *) lfirst(lc); + str = (char *) pairs->data + ck_size; + strcpy(str, elem->defname); + ck_size += strlen(str) + 1; + str = (char *) pairs->data + ck_size; + strcpy(str, strVal(elem->arg)); + ck_size += strlen(str) + 1; + } + + /* Assert the two loops agreed on size calculations. */ + Assert(kv_size == ck_size); + + return pairs; +} + +extern kv_iter new_kv_iter(const kv_list * ns) +{ + char *name; + + /* The list may be NULL or empty. */ + if (!ns || ns->length < 1) + return (kv_iter) + { + 0, + }; + + /* Grab the number of pairs point to the first name and value. */ + name = (char *) ns->data; + return (kv_iter) + { + ns->length, name, name + strlen(name) + 1 + }; +} + +extern bool +kv_iter_next(kv_iter * iter) +{ + if (iter->togo == 0) + return false; + + /* Point to the the next name and value. */ + iter->togo--; + iter->name = iter->value + strlen(iter->value) + 1; + iter->value = iter->name + strlen(iter->name) + 1; + return true; +} + +extern bool +kv_iter_done(kv_iter * iter) +{ + return iter->togo == 0; +} diff --git a/src/option.c b/src/option.c index 8b99229..d87d5c4 100644 --- a/src/option.c +++ b/src/option.c @@ -15,6 +15,7 @@ #include "postgres.h" #include "fdw.h" +#include "kv_list.h" #include "access/reloptions.h" #include "catalog/pg_foreign_server.h" @@ -25,6 +26,7 @@ #include "nodes/makefuncs.h" #include "utils/guc.h" #include "utils/varlena.h" +#include "utils/builtins.h" static char *DEFAULT_DBNAME = "default"; @@ -66,7 +68,8 @@ static const ChFdwOption ch_options[] = /* * GUC parameters */ -char *ch_session_settings = NULL; +static char *ch_session_settings = NULL; +static kv_list * ch_session_settings_list = NULL; /* * Helper functions @@ -469,35 +472,66 @@ chfdw_parse_options(const char *options_string, bool with_comma, bool with_equal /* * Now that we have the name and the value, store the record. */ - options = lappend(options, makeDefElem(strdup(pname), (Node *) makeString(strdup(pval)), -1)); + options = lappend(options, makeDefElem(pstrdup(pname), (Node *) makeString(pstrdup(pval)), -1)); } return options; } /* - * check_settings_guc - * + * Return the current list of current session settings parsed from the + * session_settings GUC. + */ +kv_list * +chfdw_get_session_settings() +{ + return ch_session_settings_list; +} + +/* * Validates the provided settings key/value pairs. */ static bool -check_settings_guc(char **newval, void **extra, GucSource source) +chfdw_check_settings_guc(char **newval, void **extra, GucSource source) { /* * The value may be an empty string, so we have to accept that value. + * Leave extra unset; chfdw_settings_assign_hook() will assign NULL to + * ch_session_settings_list. */ if (*newval == NULL || *newval[0] == '\0') return true; - /* - * Make sure we can parse the settings. - */ - chfdw_parse_options(*newval, true, false); + /* Make sure we can parse the settings. */ + List *list = chfdw_parse_options(*newval, true, false); + + if (!list) + return false; + + /* Convert them into a guc_malloc'd kv_list. */ + kv_list *settings = new_kv_list_from_pg_list(list, kv_pair_guc_malloc); + if (!settings) + return false; + + /* All good; stash for chfdw_settings_assign_hook and return true. */ + *extra = settings; + return true; +} + +/* + * Assigns the kv_list stored in extra to the ch_session_settings_list global. + */ +static void +chfdw_settings_assign_hook(const char *newval, void *extra) +{ /* - * All good if no error. + * From PostgreSQL's POV: (a) failure here is not acceptable, and (b) it + * is not necessarily called inside a transaction, so e.g. catalog lookups + * are not okay. IOW, keep it as simple as possible, and leave error + * returning behavior to chfdw_check_settings_guc(). */ - return true; + ch_session_settings_list = (kv_list *) extra; } /* @@ -522,8 +556,8 @@ _PG_init(void) "join_use_nulls 1, group_by_use_nulls 1, final 1", PGC_USERSET, 0, - check_settings_guc, - NULL, + chfdw_check_settings_guc, + chfdw_settings_assign_hook, NULL); #if PG_VERSION_NUM >= 150000