Skip to content

Commit 19fb64f

Browse files
metrics: introduce config alerts gauge
Since there is no full support of Tarantool 3 config instances in luatest yet (only treegen support in master), I had borrowed some test helpers from tarantool/crud [1]. 1. https://github.com/tarantool/crud/blob/98b120ef7095fa34525ef9d335a1458a2edf0cca/test/tarantool3_helpers Part of tarantool/grafana-dashboard#224
1 parent 4e15db9 commit 19fb64f

File tree

11 files changed

+653
-0
lines changed

11 files changed

+653
-0
lines changed

.luacheckrc

+1
Original file line numberDiff line numberDiff line change
@@ -2,3 +2,4 @@ include_files = {"**/*.lua", "*.rockspec", "*.luacheckrc"}
22
exclude_files = {"lua_modules/", ".luarocks/", ".rocks/", "tmp/", ".history/"}
33

44
max_line_length = 120
5+
max_comment_line_length = 200

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,9 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
55
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
66

77
## [Unreleased]
8+
### Added
9+
- New Tarantool 3 metrics:
10+
- tnt_config_alerts
811

912
## [1.1.0] - 2024-05-17
1013
### Added

doc/monitoring/api_reference.rst

+1
Original file line numberDiff line numberDiff line change
@@ -566,6 +566,7 @@ Metrics functions
566566
* ``cartridge_failover``
567567
* ``clock``
568568
* ``event_loop``
569+
* ``config``
569570

570571
See :ref:`metrics reference <metrics-reference>` for details.
571572
All metric collectors from the collection have ``metainfo.default = true``.

doc/monitoring/metrics_reference.rst

+18
Original file line numberDiff line numberDiff line change
@@ -993,3 +993,21 @@ Read view statistics
993993

994994
* - ``tnt_memtx_index_read_view``
995995
- Memory (in bytes) held for read views.
996+
997+
998+
Tarantool configuration
999+
-----------------------
1000+
1001+
These metrics are available starting from Tarantool 3.0.
1002+
1003+
.. container:: table
1004+
1005+
.. list-table::
1006+
:widths: 25 75
1007+
:header-rows: 0
1008+
1009+
* - ``tnt_config_alerts``
1010+
- Count of current instance :ref:`configuration apply alerts <config_api_reference_info>`.
1011+
``{level="warn"}`` label covers warnings and
1012+
``{level="error"}`` covers errors.
1013+

metrics/tarantool.lua

+1
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ local default_metrics = {
2323
cartridge_failover = require('metrics.cartridge.failover'),
2424
clock = require('metrics.tarantool.clock'),
2525
event_loop = require('metrics.tarantool.event_loop'),
26+
config = require('metrics.tarantool.config'),
2627
}
2728

2829
local all_metrics_map = {}

metrics/tarantool/config.lua

+46
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,46 @@
1+
local utils = require('metrics.utils')
2+
3+
local collectors_list = {}
4+
5+
local function get_config_alerts(config_info)
6+
-- https://github.com/tarantool/tarantool/blob/319357d5973d15d08b8eda6a230eada08b710802/src/box/lua/config/utils/aboard.lua#L17-L18
7+
local config_alerts = {
8+
warn = 0,
9+
error = 0,
10+
}
11+
12+
for _, alert in pairs(config_info.alerts) do
13+
config_alerts[alert.type] = config_alerts[alert.type] + 1
14+
end
15+
16+
return config_alerts
17+
end
18+
19+
local function update()
20+
if not utils.is_tarantool3() then
21+
return
22+
end
23+
24+
-- Can migrate to box.info().config later
25+
-- https://github.com/tarantool/tarantool/commit/a1544d3bbc029c6fb2a148e580afe2b20e269b8d
26+
local config = require('config')
27+
local config_info = config:info()
28+
29+
local config_alerts = get_config_alerts(config_info)
30+
31+
for level, count in pairs(config_alerts) do
32+
collectors_list.config_alerts = utils.set_gauge(
33+
'config_alerts',
34+
'Tarantool 3 configuration alerts',
35+
count,
36+
{level = level},
37+
nil,
38+
{default = true}
39+
)
40+
end
41+
end
42+
43+
return {
44+
update = update,
45+
list = collectors_list,
46+
}

metrics/utils.lua

+16
Original file line numberDiff line numberDiff line change
@@ -36,4 +36,20 @@ function utils.delete_collectors(list)
3636
table.clear(list)
3737
end
3838

39+
local function get_tarantool_version()
40+
local version_parts = rawget(_G, '_TARANTOOL'):split('-', 3)
41+
42+
local major_minor_patch_parts = version_parts[1]:split('.', 2)
43+
local major = tonumber(major_minor_patch_parts[1])
44+
local minor = tonumber(major_minor_patch_parts[2])
45+
local patch = tonumber(major_minor_patch_parts[3])
46+
47+
return major, minor, patch
48+
end
49+
50+
function utils.is_tarantool3()
51+
local major = get_tarantool_version()
52+
return major == 3
53+
end
54+
3955
return utils
+179
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,179 @@
1+
local t = require('luatest')
2+
local g = t.group()
3+
4+
local fio = require('fio')
5+
local yaml = require('yaml')
6+
7+
local utils = require('test.utils')
8+
local treegen = require('test.tarantool3_helpers.treegen')
9+
local server_helper = require('test.tarantool3_helpers.server')
10+
11+
g.before_all(function(cg)
12+
cg.treegen = {}
13+
treegen.init(cg.treegen)
14+
end)
15+
16+
g.after_all(function(cg)
17+
treegen.clean(cg.treegen)
18+
end)
19+
20+
21+
local default_config = {
22+
credentials = {
23+
users = {
24+
guest = {
25+
roles = {'super'},
26+
},
27+
replicator = {
28+
password = 'replicating',
29+
roles = {'replication'},
30+
},
31+
},
32+
},
33+
iproto = {
34+
advertise = {
35+
peer = {
36+
login = 'replicator',
37+
},
38+
},
39+
},
40+
groups = {
41+
servers = {
42+
replicasets = {
43+
['server-001'] = {
44+
leader = 'server-001-a',
45+
instances = {
46+
['server-001-a'] = {
47+
iproto = {
48+
listen = {{uri = 'localhost:3301'}},
49+
},
50+
},
51+
},
52+
},
53+
},
54+
},
55+
},
56+
replication = {
57+
failover = 'manual',
58+
},
59+
metrics = {
60+
include = {'all'},
61+
},
62+
}
63+
64+
local function write_config(cg, config)
65+
return treegen.write_script(cg.server_dir, 'config.yaml', yaml.encode(config))
66+
end
67+
68+
local function start_server(cg)
69+
t.skip_if(not utils.is_tarantool_3_config_supported(),
70+
'Skip since Tarantool 3 config is unsupported')
71+
72+
cg.server_dir = treegen.prepare_directory(cg.treegen, {}, {})
73+
local config_file = write_config(cg, default_config)
74+
75+
cg.server = server_helper:new{
76+
alias = 'server-001-a',
77+
config_file = config_file,
78+
chdir = cg.server_dir,
79+
}
80+
cg.server:start{wait_until_ready = true}
81+
end
82+
83+
local function stop_server(cg)
84+
if cg.server ~= nil then
85+
cg.server:stop()
86+
cg.server = nil
87+
end
88+
89+
if cg.server_dir ~= nil then
90+
fio.rmtree(cg.server_dir)
91+
cg.server_dir = nil
92+
end
93+
end
94+
95+
local function reload_config(cg, config)
96+
write_config(cg, config)
97+
cg.server:exec(function()
98+
pcall(function()
99+
require('config'):reload()
100+
end)
101+
end)
102+
end
103+
104+
local function assert_config_alerts_metrics(server, expected_values)
105+
local observations = server:exec(function()
106+
local metrics = require('metrics')
107+
metrics.invoke_callbacks()
108+
return metrics.collect()
109+
end)
110+
111+
local warnings = utils.find_obs(
112+
'tnt_config_alerts',
113+
{level = 'warn', alias = 'server-001-a'},
114+
observations
115+
)
116+
t.assert_equals(warnings.value, expected_values['warn'])
117+
118+
local errors = utils.find_obs(
119+
'tnt_config_alerts',
120+
{level = 'error', alias = 'server-001-a'},
121+
observations
122+
)
123+
t.assert_equals(errors.value, expected_values['error'])
124+
end
125+
126+
127+
g.before_test('test_config_alerts_if_healthy', start_server)
128+
g.after_test('test_config_alerts_if_healthy', stop_server)
129+
130+
g.test_config_alerts_if_healthy = function(cg)
131+
assert_config_alerts_metrics(cg.server, {warn = 0, error = 0})
132+
end
133+
134+
135+
g.before_test('test_config_alerts_if_minor_trouble', start_server)
136+
g.after_test('test_config_alerts_if_minor_trouble', stop_server)
137+
138+
g.test_config_alerts_if_minor_trouble = function(cg)
139+
local config = table.deepcopy(default_config)
140+
config['credentials']['users']['user_one'] = {roles = {'role_two'}}
141+
reload_config(cg, config)
142+
143+
assert_config_alerts_metrics(cg.server, {warn = 1, error = 0})
144+
end
145+
146+
147+
g.before_test('test_config_alerts_if_critical_failure', start_server)
148+
g.after_test('test_config_alerts_if_critical_failure', stop_server)
149+
150+
g.test_config_alerts_if_critical_failure = function(cg)
151+
local config = table.deepcopy(default_config)
152+
config['groups']['servers'] = {}
153+
reload_config(cg, config)
154+
155+
assert_config_alerts_metrics(cg.server, {warn = 0, error = 1})
156+
end
157+
158+
159+
g.before_test('test_config_alerts_if_unsupported', function(cg)
160+
t.skip_if(utils.is_tarantool_3_config_supported(),
161+
'Skip since Tarantool 3 config is supported')
162+
utils.create_server(cg)
163+
end)
164+
165+
g.after_test('test_config_alerts_if_unsupported', function(cg)
166+
utils.drop_server(cg)
167+
cg.server = nil
168+
end)
169+
170+
g.test_config_alerts_if_unsupported = function(cg)
171+
local observations = cg.server:exec(function()
172+
local metrics = require('metrics')
173+
metrics.invoke_callbacks()
174+
return metrics.collect()
175+
end)
176+
177+
local alerts = utils.find_metric('tnt_config_alerts', observations)
178+
t.assert_equals(alerts, nil)
179+
end

0 commit comments

Comments
 (0)