@@ -4139,61 +4139,69 @@ typedef struct PerfAttachment {
41394139 _Atomic unsigned int readers;
41404140 };
41414141
4142+ /* Lazy chunked perf_attachment_state lookup table.
4143+ * Top - level is a fixed array of chunk pointers; chunks are malloc'd on demand
4144+ * the first time a perf_fd in that range is attached, and never freed .
4145+ * Chunks never move once allocated , so reader pointers into a slot stay valid
4146+ * for the lifetime of the process without any resize /quiescence handshake .
4147+ * The fd -space ceiling is CHUNK_SIZE * MAX_CHUNKS, which covers any plausible
4148+ * RLIMIT_NOFILE on Linux (kernel fs .nr_open caps well under this ). */
4149+ #define KS_PERF_STATE_CHUNK_BITS 10 u
4150+ #define KS_PERF_STATE_CHUNK_SIZE (1 u << KS_PERF_STATE_CHUNK_BITS)
4151+ #define KS_PERF_STATE_CHUNK_MASK (KS_PERF_STATE_CHUNK_SIZE - 1 u)
4152+ #define KS_PERF_STATE_MAX_CHUNKS 4096 u
4153+
41424154 static struct attachment_entry *attached_programs = NULL ;
4143- static _Atomic(struct perf_attachment_state * ) perf_attachment_states = NULL ;
4144- static _Atomic size_t perf_attachment_state_capacity = 0 ;
4155+ static _Atomic(struct perf_attachment_state * ) perf_state_chunks[KS_PERF_STATE_MAX_CHUNKS ];
41454156 static pthread_mutex_t attachment_mutex = PTHREAD_MUTEX_INITIALIZER ;
41464157 static int next_attachment_id = 1 ;
41474158 static uint64_t next_perf_attachment_generation = 1 ;
41484159
4149- static int ensure_perf_attachment_state_capacity_locked (int perf_fd) {
4160+ static struct perf_attachment_state * perf_state_slot_lookup (int perf_fd) {
41504161 if (perf_fd < 0 ) {
4151- return - 1 ;
4162+ return NULL ;
41524163 }
4153-
4154- size_t capacity = atomic_load_explicit(& perf_attachment_state_capacity, memory_order_acquire);
4155- if ((size_t)perf_fd < capacity) {
4156- return 0 ;
4164+ size_t chunk_idx = (size_t)perf_fd >> KS_PERF_STATE_CHUNK_BITS ;
4165+ if (chunk_idx > = KS_PERF_STATE_MAX_CHUNKS ) {
4166+ return NULL ;
41574167 }
4158-
4159- if (capacity > 0 ) {
4160- fprintf(stderr, " perf fd %d exceeds perf attachment state table capacity %zu\n " ,
4161- perf_fd, capacity);
4162- return - 1 ;
4168+ struct perf_attachment_state * chunk =
4169+ atomic_load_explicit(& perf_state_chunks[chunk_idx], memory_order_acquire);
4170+ if (! chunk) {
4171+ return NULL ;
41634172 }
4173+ return & chunk[(size_t)perf_fd & KS_PERF_STATE_CHUNK_MASK ];
4174+ }
41644175
4165- struct rlimit limit;
4166- capacity = 1024 ;
4167- if (getrlimit(RLIMIT_NOFILE , & limit) == 0 &&
4168- limit.rlim_cur != RLIM_INFINITY &&
4169- limit.rlim_cur > 0 ) {
4170- capacity = (size_t)limit.rlim_cur;
4171- } else {
4172- long open_max = sysconf(_SC_OPEN_MAX);
4173- if (open_max > 0 ) {
4174- capacity = (size_t)open_max;
4175- }
4176- }
4177- if ((size_t)perf_fd > = capacity) {
4178- capacity = (size_t)perf_fd + 1 ;
4176+ /* Caller must hold attachment_mutex. Allocates the chunk containing perf_fd's
4177+ * slot if not yet present, and returns a pointer to the slot . */
4178+ static struct perf_attachment_state *ensure_perf_attachment_state_locked (int perf_fd ) {
4179+ if (perf_fd < 0) {
4180+ return NULL;
41794181 }
4180-
4181- struct perf_attachment_state * states =
4182- malloc(capacity * sizeof( struct perf_attachment_state));
4183- if ( ! states) {
4184- fprintf(stderr, " Failed to allocate perf attachment state table \n " );
4185- return - 1 ;
4182+ size_t chunk_idx = (size_t)perf_fd >> KS_PERF_STATE_CHUNK_BITS ;
4183+ if (chunk_idx > = KS_PERF_STATE_MAX_CHUNKS ) {
4184+ fprintf(stderr,
4185+ " perf fd %d exceeds supported perf attachment range (max %u) \n " ,
4186+ perf_fd, KS_PERF_STATE_MAX_CHUNKS * KS_PERF_STATE_CHUNK_SIZE );
4187+ return NULL ;
41864188 }
4187-
4188- for (size_t i = 0 ; i < capacity; i++ ) {
4189- atomic_init(& states[i].generation, 0 );
4190- atomic_init(& states[i].perf_fd, - 1 );
4191- atomic_init(& states[i].readers, 0 );
4189+ struct perf_attachment_state * chunk =
4190+ atomic_load_explicit(& perf_state_chunks[chunk_idx], memory_order_acquire);
4191+ if (! chunk) {
4192+ chunk = malloc(KS_PERF_STATE_CHUNK_SIZE * sizeof(* chunk));
4193+ if (!chunk) {
4194+ fprintf(stderr, "Failed to allocate perf attachment state chunk\n");
4195+ return NULL;
4196+ }
4197+ for (size_t i = 0; i < KS_PERF_STATE_CHUNK_SIZE; i++) {
4198+ atomic_init(&chunk[i].generation, 0);
4199+ atomic_init(&chunk[i].perf_fd, -1);
4200+ atomic_init(&chunk[i].readers, 0);
4201+ }
4202+ atomic_store_explicit(&perf_state_chunks[chunk_idx], chunk, memory_order_release);
41924203 }
4193-
4194- atomic_store_explicit(& perf_attachment_states, states, memory_order_release);
4195- atomic_store_explicit(& perf_attachment_state_capacity, capacity, memory_order_release);
4196- return 0 ;
4204+ return &chunk[(size_t)perf_fd & KS_PERF_STATE_CHUNK_MASK];
41974205 }
41984206
41994207 static void invalidate_perf_attachment_state_locked(struct attachment_entry *entry) {
@@ -4204,11 +4212,8 @@ typedef struct PerfAttachment {
42044212 return;
42054213 }
42064214
4207- size_t capacity = atomic_load_explicit(& perf_attachment_state_capacity, memory_order_acquire);
4208- struct perf_attachment_state * states =
4209- atomic_load_explicit(& perf_attachment_states, memory_order_acquire);
4210- if ((size_t)entry->perf_fd < capacity && states) {
4211- struct perf_attachment_state * state = & states[entry->perf_fd];
4215+ struct perf_attachment_state *state = perf_state_slot_lookup(entry->perf_fd);
4216+ if (state) {
42124217 atomic_store_explicit(&state->perf_fd, -1, memory_order_release);
42134218 atomic_store_explicit(&state->generation, 0, memory_order_release);
42144219 while (atomic_load_explicit(&state->readers, memory_order_acquire) != 0) {
@@ -4223,14 +4228,11 @@ typedef struct PerfAttachment {
42234228 return NULL;
42244229 }
42254230
4226- size_t capacity = atomic_load_explicit(& perf_attachment_state_capacity, memory_order_acquire);
4227- struct perf_attachment_state * states =
4228- atomic_load_explicit(& perf_attachment_states, memory_order_acquire);
4229- if (! states || (size_t)attachment.perf_fd > = capacity) {
4231+ struct perf_attachment_state *state = perf_state_slot_lookup(attachment.perf_fd);
4232+ if (!state) {
42304233 return NULL;
42314234 }
42324235
4233- struct perf_attachment_state * state = & states[attachment.perf_fd];
42344236 uint64_t generation =
42354237 atomic_load_explicit(&state->generation, memory_order_acquire);
42364238 int perf_fd =
@@ -4295,7 +4297,8 @@ typedef struct PerfAttachment {
42954297 }
42964298 entry->attachment_id = next_attachment_id++;
42974299 if (type == BPF_PROG_TYPE_PERF_EVENT && perf_fd >= 0) {
4298- if (ensure_perf_attachment_state_capacity_locked(perf_fd) != 0 ) {
4300+ struct perf_attachment_state *state = ensure_perf_attachment_state_locked(perf_fd);
4301+ if (!state) {
42994302 pthread_mutex_unlock(&attachment_mutex);
43004303 free(entry);
43014304 return -1;
@@ -4304,10 +4307,8 @@ typedef struct PerfAttachment {
43044307 if (next_perf_attachment_generation == 0) {
43054308 next_perf_attachment_generation = 1;
43064309 }
4307- struct perf_attachment_state * states =
4308- atomic_load_explicit(& perf_attachment_states, memory_order_acquire);
4309- atomic_store_explicit(& states[perf_fd].perf_fd, perf_fd, memory_order_release);
4310- atomic_store_explicit(& states[perf_fd].generation, entry->generation, memory_order_release);
4310+ atomic_store_explicit(&state->perf_fd, perf_fd, memory_order_release);
4311+ atomic_store_explicit(&state->generation, entry->generation, memory_order_release);
43114312 }
43124313 entry->next = attached_programs;
43134314 attached_programs = entry;
0 commit comments