Skip to content

Commit d986963

Browse files
committed
Switch perf attachment state to lazy chunked table
Signed-off-by: Cong Wang <cwang@multikernel.io>
1 parent f032a0a commit d986963

1 file changed

Lines changed: 58 additions & 57 deletions

File tree

src/userspace_codegen.ml

Lines changed: 58 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -4139,61 +4139,69 @@ typedef struct PerfAttachment {
41394139
_Atomic unsigned int readers;
41404140
};
41414141

4142+
/* Lazy chunked perf_attachment_state lookup table.
4143+
* Top-level is a fixed array of chunk pointers; chunks are malloc'd on demand
4144+
* the first time a perf_fd in that range is attached, and never freed.
4145+
* Chunks never move once allocated, so reader pointers into a slot stay valid
4146+
* for the lifetime of the process without any resize/quiescence handshake.
4147+
* The fd-space ceiling is CHUNK_SIZE * MAX_CHUNKS, which covers any plausible
4148+
* RLIMIT_NOFILE on Linux (kernel fs.nr_open caps well under this). */
4149+
#define KS_PERF_STATE_CHUNK_BITS 10u
4150+
#define KS_PERF_STATE_CHUNK_SIZE (1u << KS_PERF_STATE_CHUNK_BITS)
4151+
#define KS_PERF_STATE_CHUNK_MASK (KS_PERF_STATE_CHUNK_SIZE - 1u)
4152+
#define KS_PERF_STATE_MAX_CHUNKS 4096u
4153+
41424154
static struct attachment_entry *attached_programs = NULL;
4143-
static _Atomic(struct perf_attachment_state *) perf_attachment_states = NULL;
4144-
static _Atomic size_t perf_attachment_state_capacity = 0;
4155+
static _Atomic(struct perf_attachment_state *) perf_state_chunks[KS_PERF_STATE_MAX_CHUNKS];
41454156
static pthread_mutex_t attachment_mutex = PTHREAD_MUTEX_INITIALIZER;
41464157
static int next_attachment_id = 1;
41474158
static uint64_t next_perf_attachment_generation = 1;
41484159

4149-
static int ensure_perf_attachment_state_capacity_locked(int perf_fd) {
4160+
static struct perf_attachment_state *perf_state_slot_lookup(int perf_fd) {
41504161
if (perf_fd < 0) {
4151-
return -1;
4162+
return NULL;
41524163
}
4153-
4154-
size_t capacity = atomic_load_explicit(&perf_attachment_state_capacity, memory_order_acquire);
4155-
if ((size_t)perf_fd < capacity) {
4156-
return 0;
4164+
size_t chunk_idx = (size_t)perf_fd >> KS_PERF_STATE_CHUNK_BITS;
4165+
if (chunk_idx >= KS_PERF_STATE_MAX_CHUNKS) {
4166+
return NULL;
41574167
}
4158-
4159-
if (capacity > 0) {
4160-
fprintf(stderr, "perf fd %d exceeds perf attachment state table capacity %zu\n",
4161-
perf_fd, capacity);
4162-
return -1;
4168+
struct perf_attachment_state *chunk =
4169+
atomic_load_explicit(&perf_state_chunks[chunk_idx], memory_order_acquire);
4170+
if (!chunk) {
4171+
return NULL;
41634172
}
4173+
return &chunk[(size_t)perf_fd & KS_PERF_STATE_CHUNK_MASK];
4174+
}
41644175

4165-
struct rlimit limit;
4166-
capacity = 1024;
4167-
if (getrlimit(RLIMIT_NOFILE, &limit) == 0 &&
4168-
limit.rlim_cur != RLIM_INFINITY &&
4169-
limit.rlim_cur > 0) {
4170-
capacity = (size_t)limit.rlim_cur;
4171-
} else {
4172-
long open_max = sysconf(_SC_OPEN_MAX);
4173-
if (open_max > 0) {
4174-
capacity = (size_t)open_max;
4175-
}
4176-
}
4177-
if ((size_t)perf_fd >= capacity) {
4178-
capacity = (size_t)perf_fd + 1;
4176+
/* Caller must hold attachment_mutex. Allocates the chunk containing perf_fd's
4177+
* slot if not yet present, and returns a pointer to the slot. */
4178+
static struct perf_attachment_state *ensure_perf_attachment_state_locked(int perf_fd) {
4179+
if (perf_fd < 0) {
4180+
return NULL;
41794181
}
4180-
4181-
struct perf_attachment_state *states =
4182-
malloc(capacity * sizeof(struct perf_attachment_state));
4183-
if (!states) {
4184-
fprintf(stderr, "Failed to allocate perf attachment state table\n");
4185-
return -1;
4182+
size_t chunk_idx = (size_t)perf_fd >> KS_PERF_STATE_CHUNK_BITS;
4183+
if (chunk_idx >= KS_PERF_STATE_MAX_CHUNKS) {
4184+
fprintf(stderr,
4185+
"perf fd %d exceeds supported perf attachment range (max %u)\n",
4186+
perf_fd, KS_PERF_STATE_MAX_CHUNKS * KS_PERF_STATE_CHUNK_SIZE);
4187+
return NULL;
41864188
}
4187-
4188-
for (size_t i = 0; i < capacity; i++) {
4189-
atomic_init(&states[i].generation, 0);
4190-
atomic_init(&states[i].perf_fd, -1);
4191-
atomic_init(&states[i].readers, 0);
4189+
struct perf_attachment_state *chunk =
4190+
atomic_load_explicit(&perf_state_chunks[chunk_idx], memory_order_acquire);
4191+
if (!chunk) {
4192+
chunk = malloc(KS_PERF_STATE_CHUNK_SIZE * sizeof(*chunk));
4193+
if (!chunk) {
4194+
fprintf(stderr, "Failed to allocate perf attachment state chunk\n");
4195+
return NULL;
4196+
}
4197+
for (size_t i = 0; i < KS_PERF_STATE_CHUNK_SIZE; i++) {
4198+
atomic_init(&chunk[i].generation, 0);
4199+
atomic_init(&chunk[i].perf_fd, -1);
4200+
atomic_init(&chunk[i].readers, 0);
4201+
}
4202+
atomic_store_explicit(&perf_state_chunks[chunk_idx], chunk, memory_order_release);
41924203
}
4193-
4194-
atomic_store_explicit(&perf_attachment_states, states, memory_order_release);
4195-
atomic_store_explicit(&perf_attachment_state_capacity, capacity, memory_order_release);
4196-
return 0;
4204+
return &chunk[(size_t)perf_fd & KS_PERF_STATE_CHUNK_MASK];
41974205
}
41984206
41994207
static void invalidate_perf_attachment_state_locked(struct attachment_entry *entry) {
@@ -4204,11 +4212,8 @@ typedef struct PerfAttachment {
42044212
return;
42054213
}
42064214
4207-
size_t capacity = atomic_load_explicit(&perf_attachment_state_capacity, memory_order_acquire);
4208-
struct perf_attachment_state *states =
4209-
atomic_load_explicit(&perf_attachment_states, memory_order_acquire);
4210-
if ((size_t)entry->perf_fd < capacity && states) {
4211-
struct perf_attachment_state *state = &states[entry->perf_fd];
4215+
struct perf_attachment_state *state = perf_state_slot_lookup(entry->perf_fd);
4216+
if (state) {
42124217
atomic_store_explicit(&state->perf_fd, -1, memory_order_release);
42134218
atomic_store_explicit(&state->generation, 0, memory_order_release);
42144219
while (atomic_load_explicit(&state->readers, memory_order_acquire) != 0) {
@@ -4223,14 +4228,11 @@ typedef struct PerfAttachment {
42234228
return NULL;
42244229
}
42254230
4226-
size_t capacity = atomic_load_explicit(&perf_attachment_state_capacity, memory_order_acquire);
4227-
struct perf_attachment_state *states =
4228-
atomic_load_explicit(&perf_attachment_states, memory_order_acquire);
4229-
if (!states || (size_t)attachment.perf_fd >= capacity) {
4231+
struct perf_attachment_state *state = perf_state_slot_lookup(attachment.perf_fd);
4232+
if (!state) {
42304233
return NULL;
42314234
}
42324235
4233-
struct perf_attachment_state *state = &states[attachment.perf_fd];
42344236
uint64_t generation =
42354237
atomic_load_explicit(&state->generation, memory_order_acquire);
42364238
int perf_fd =
@@ -4295,7 +4297,8 @@ typedef struct PerfAttachment {
42954297
}
42964298
entry->attachment_id = next_attachment_id++;
42974299
if (type == BPF_PROG_TYPE_PERF_EVENT && perf_fd >= 0) {
4298-
if (ensure_perf_attachment_state_capacity_locked(perf_fd) != 0) {
4300+
struct perf_attachment_state *state = ensure_perf_attachment_state_locked(perf_fd);
4301+
if (!state) {
42994302
pthread_mutex_unlock(&attachment_mutex);
43004303
free(entry);
43014304
return -1;
@@ -4304,10 +4307,8 @@ typedef struct PerfAttachment {
43044307
if (next_perf_attachment_generation == 0) {
43054308
next_perf_attachment_generation = 1;
43064309
}
4307-
struct perf_attachment_state *states =
4308-
atomic_load_explicit(&perf_attachment_states, memory_order_acquire);
4309-
atomic_store_explicit(&states[perf_fd].perf_fd, perf_fd, memory_order_release);
4310-
atomic_store_explicit(&states[perf_fd].generation, entry->generation, memory_order_release);
4310+
atomic_store_explicit(&state->perf_fd, perf_fd, memory_order_release);
4311+
atomic_store_explicit(&state->generation, entry->generation, memory_order_release);
43114312
}
43124313
entry->next = attached_programs;
43134314
attached_programs = entry;

0 commit comments

Comments
 (0)