Skip to content

Commit

Permalink
Elkeid v1.9 huge memory occupation on Rocky 8 (v4.18)
Browse files Browse the repository at this point in the history
oncall: https://oncall.bytedance.net/chats/user/userCase?cross_oncall_flow_id=46075810&picked_detail=46075810

stack trace:
@[
    __kmalloc+376
    __kmalloc+376
    smith_init_ent+26
    tt_rb_insert_key_nolock+30
    smith_insert_ent+152
    security_inode_create_pre_handler+4174
    kprobe_ftrace_handler+144
    ftrace_ops_assist_func+110
    intel_nhlt_get_dmic_geo+286056
    security_inode_create+5
    path_openat+3372
    do_filp_open+147
    do_sys_open+388
    do_syscall_64+91
    entry_SYSCALL_64_after_hwframe+101
]: 1440

影响范围:内核早于4.19的火山云环境 (1.9版本的驱动,支持文件落盘扫描功能),CentOS 6/7/8均在此范围之内。

产生机制:针对4.19之前的内核(不支持FMODE_CREATE通知功能),HIDS驱动只能自身缓存及管理新文件的创建信息,针对大量创建新文件并长时间占用的情况会导致内存使用量一直累加,因为没有文件关闭事件故不会触发基于LRU的问题控制机制故此内存不会释放,而正常程序会主动关闭文件句柄所以不会触发此问题。

修复方案:针对大量文件创建并保持占用的情况将强制启用LRU回收,已测试通过,发布前还需要进一步强化测试及验证。临时解决办法就是先下线HIDS驱动,等新版本发布后再次拉起即可,不需要系统重启;另外针对新文件创建量不大的系统,可以卸载并重新加载驱动可做到缓解内存占用的持续增加

问题分析与复盘: https://bytedance.larkoffice.com/docx/ZYsid6QOzo93fDx5evncgXLInHd

Signed-off-by: shenping.matt <[email protected]>
  • Loading branch information
shenping-bd committed Mar 1, 2024
1 parent 6185a45 commit d5a48c3
Showing 1 changed file with 28 additions and 37 deletions.
65 changes: 28 additions & 37 deletions driver/LKM/src/smith_hook.c
Original file line number Diff line number Diff line change
Expand Up @@ -4019,8 +4019,8 @@ void smith_enum_img(void)
static struct tt_rb g_rb_ent; /* rbtree of cached ents */
static LIST_HEAD(g_lru_ent); /* lru list of cached ents */

#define SMITH_ENT_REAPER (600) /* 10 minutes */
#define SMITH_ENT_MAX (2048) /* max cached imgs */
#define SMITH_ENT_REAPER (60) /* 60 seconds */
#define SMITH_ENT_MAX (1UL << 16) /* max pathes to be cached */

static int smith_build_ent(struct smith_ent *ent, struct smith_ent *obj)
{
Expand Down Expand Up @@ -4093,7 +4093,7 @@ static void smith_release_ent(struct tt_rb *rb, struct tt_node *tnod)
* support routines for entry cache
*/

static int smith_drop_head_ent(void)
static int smith_drop_head_ent(int count)
{
struct list_head *link;
struct smith_ent *ent;
Expand All @@ -4105,18 +4105,14 @@ static int smith_drop_head_ent(void)
if (list_empty(&g_lru_ent))
goto errorout;

if (0 == atomic_read(&ent->se_node.refs)) {
if (smith_get_seconds() > ent->se_age) {
list_del_init(&ent->se_link);
/* this entry hasn't been touched for seconds */
/* so remove the ent from rbtree and drop it */
tt_rb_remove_node_nolock(&g_rb_ent, &ent->se_node);
rc++;
}
} else {
if (smith_get_seconds() > ent->se_age || count > SMITH_ENT_MAX) {
list_del_init(&ent->se_link);
/* smith_put_ent will put it back to lru list */
/* this entry hasn't been touched for seconds */
/* so remove the ent from rbtree and drop it */
tt_rb_remove_node_nolock(&g_rb_ent, &ent->se_node);
rc++;
}

errorout:
write_unlock(&g_rb_ent.lock);

Expand All @@ -4127,10 +4123,10 @@ static void smith_drop_head_ents(struct tt_rb *rb)
{
int count = atomic_read(&rb->count);

do {
if (!smith_drop_head_ent())
while (--count > SMITH_ENT_MAX) {
if (!smith_drop_head_ent(count))
break;
} while (--count > SMITH_ENT_MAX);
}
}

static void smith_prepare_ent(char *path, struct smith_ent *ent)
Expand All @@ -4143,30 +4139,35 @@ static void smith_prepare_ent(char *path, struct smith_ent *ent)

int smith_insert_ent(char *path)
{
struct smith_ent obj;
struct smith_ent obj, *ent;
struct tt_node *tnod = NULL;


/* init obj */
smith_prepare_ent(path, &obj);

/* check whether the entry was already inserted ? */
read_lock(&g_rb_ent.lock);
tnod = tt_rb_lookup_nolock(&g_rb_ent, &obj);
if (tnod) {
atomic_inc(&tnod->refs);
read_unlock(&g_rb_ent.lock);
read_unlock(&g_rb_ent.lock);
if (tnod)
goto out;
} else {
read_unlock(&g_rb_ent.lock);
}

/* insert new node to rbtree */
write_lock(&g_rb_ent.lock);
tnod = tt_rb_insert_key_nolock(&g_rb_ent, &obj.se_node);
if (tnod)
atomic_inc(&tnod->refs);
if (tnod) {
ent = container_of(tnod, struct smith_ent, se_node);
/* remove ent from LRU if it's already LRUed */
list_del_init(&ent->se_link);
ent->se_age = smith_get_seconds() + SMITH_ENT_REAPER;
/* insert ent to the tail of LRU list */
list_add_tail(&ent->se_link, &g_lru_ent);
}
write_unlock(&g_rb_ent.lock);

smith_drop_head_ents(&g_rb_ent);

out:
return (!!tnod);
}
Expand All @@ -4182,31 +4183,21 @@ int smith_remove_ent(char *path)
/* check whether the entry was already inserted ? */
read_lock(&g_rb_ent.lock);
tnod = tt_rb_lookup_nolock(&g_rb_ent, &obj);
if (tnod) {
ent = container_of(tnod, struct smith_ent, se_node);
if (ent->se_tgid != current->tgid)
tnod = NULL;
}
read_unlock(&g_rb_ent.lock);
if (!tnod)
goto out;

write_lock(&g_rb_ent.lock);
/* do 2nd search to assure it's in lru list */
tnod = tt_rb_lookup_nolock(&g_rb_ent, &obj);
if (tnod) {
ent = container_of(tnod, struct smith_ent, se_node);
list_del_init(&ent->se_link);
if (0 == atomic_dec_return(&ent->se_node.refs)) {
tt_rb_remove_node_nolock(&g_rb_ent, tnod);
} else {
ent->se_age = smith_get_seconds() + SMITH_ENT_REAPER;
list_add_tail(&ent->se_link, &g_lru_ent);
}
tt_rb_remove_node_nolock(&g_rb_ent, tnod);
}
write_unlock(&g_rb_ent.lock);

out:
smith_drop_head_ents(&g_rb_ent);
return (!!tnod);
}

Expand Down

0 comments on commit d5a48c3

Please sign in to comment.