Skip to content

Commit b17a323

Browse files
authored
remove old thread info from cpu metrics
In case if application starts new threads it's possible that in result thread info will contain info about all threads that were started ever. As result size of such metrics will be quite huge. This patch removes info about dead threads and user will see info only about relevant threads. Closes #376
1 parent ae06711 commit b17a323

File tree

3 files changed

+84
-10
lines changed

3 files changed

+84
-10
lines changed

CHANGELOG.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -9,8 +9,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
- Float numbers in Graphite exporter
1111
- Signed timestamp in Graphite exporter
12-
- Increase `Shared.make_key` perfomance in observations with empty label
12+
- Increase `Shared.make_key` performance in observations with empty label
1313
- Forbid observation of non-number value in collectors (except `gauge:set`)
14+
- Clean dead threads from `psutils.cpu` metric
1415

1516
### Added
1617

metrics/psutils/cpu.lua

+26-9
Original file line numberDiff line numberDiff line change
@@ -12,25 +12,42 @@ local instance_file = arg[0]
1212
collectors_list.cpu_count = utils.set_gauge('cpu_count', 'The number of processors', psutils.get_cpu_count())
1313
collectors_list.cpu_number = utils.set_gauge('cpu_number', 'The number of processors', psutils.get_cpu_count())
1414

15+
local threads = {}
16+
1517
local function update_cpu_metrics()
1618
utils.set_gauge('cpu_total', 'Host CPU time', psutils.get_cpu_time())
1719
utils.set_gauge('cpu_time', 'Host CPU time', psutils.get_cpu_time())
1820

21+
local new_threads = {}
1922
for _, thread_info in ipairs(psutils.get_process_cpu_time()) do
20-
collectors_list.cpu_thread = utils.set_gauge('cpu_thread', 'Tarantool thread cpu time', thread_info.utime, {
21-
kind = 'user',
23+
local labels = {
2224
thread_name = thread_info.comm,
2325
thread_pid = thread_info.pid,
2426
file_name = instance_file,
25-
})
27+
}
2628

27-
collectors_list.cpu_thread = utils.set_gauge('cpu_thread', 'Tarantool thread cpu time', thread_info.stime, {
28-
kind = 'system',
29-
thread_name = thread_info.comm,
30-
thread_pid = thread_info.pid,
31-
file_name = instance_file,
32-
})
29+
local utime_labels = table.copy(labels)
30+
utime_labels.kind = 'user'
31+
collectors_list.cpu_thread = utils.set_gauge('cpu_thread', 'Tarantool thread cpu time',
32+
thread_info.utime, utime_labels)
33+
34+
local stime_labels = table.copy(labels)
35+
stime_labels.kind = 'user'
36+
collectors_list.cpu_thread = utils.set_gauge('cpu_thread', 'Tarantool thread cpu time',
37+
thread_info.stime, stime_labels)
38+
39+
threads[thread_info.pid] = nil
40+
new_threads[thread_info.pid] = labels
41+
end
42+
43+
for _, thread_info in pairs(threads) do
44+
thread_info.kind = 'user'
45+
collectors_list.cpu_thread:remove(thread_info)
46+
47+
thread_info.kind = 'system'
48+
collectors_list.cpu_thread:remove(thread_info)
3349
end
50+
threads = new_threads
3451
end
3552

3653
return {
+56
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#!/usr/bin/env tarantool
2+
3+
local t = require('luatest')
4+
local g = t.group('psutils_linux_clean_info')
5+
local utils = require('test.utils')
6+
local metrics = require('metrics')
7+
local fiber = require('fiber')
8+
local fio = require('fio')
9+
local psutils_linux = require('metrics.psutils.psutils_linux')
10+
local cpu = require('metrics.psutils.cpu')
11+
12+
g.before_all(function()
13+
t.skip_if(jit.os ~= 'Linux', 'Linux is the only supported platform')
14+
utils.init()
15+
end)
16+
17+
g.after_each(function()
18+
metrics.clear()
19+
end)
20+
21+
g.test_clean_thread_info = function()
22+
box.cfg{worker_pool_threads = 100}
23+
24+
for _ = 1, 1000 do
25+
fiber.new(function() fio.stat(arg[-1]) end)
26+
end
27+
fiber.sleep(0.1)
28+
29+
cpu.update()
30+
local list1 = psutils_linux.get_process_cpu_time()
31+
local observations1 = metrics.collect()
32+
local coio_count1 = 0
33+
for _, thread_info in ipairs(list1) do
34+
if thread_info.comm == 'coio' then
35+
coio_count1 = coio_count1 + 1
36+
end
37+
end
38+
39+
box.cfg{worker_pool_threads = 1}
40+
fiber.sleep(0.1)
41+
42+
cpu.update()
43+
local list2 = psutils_linux.get_process_cpu_time()
44+
local observations2 = metrics.collect()
45+
local coio_count2 = 0
46+
for _, thread_info in ipairs(list2) do
47+
if thread_info.comm == 'coio' then
48+
coio_count2 = coio_count2 + 1
49+
end
50+
end
51+
52+
t.assert_gt(#list1, #list2)
53+
t.assert_gt(#observations1, #observations2)
54+
t.assert_gt(coio_count1, coio_count2)
55+
t.assert_equals(#observations1 - #observations2, coio_count1 - coio_count2)
56+
end

0 commit comments

Comments
 (0)