Skip to content

Commit cdb92d3

Browse files
yngvar-antonssonopomucDifferentialOrange
authored
summary: age buckets added (#193)
Co-authored-by: Roman Proskin <[email protected]> Co-authored-by: Georgy Moiseev <[email protected]>
1 parent c37ae73 commit cdb92d3

12 files changed

+338
-49
lines changed

CHANGELOG.md

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
1313
### Changed
1414
- `tnt_cartridge_issues` gathers only local issues [#211](https://github.com/tarantool/metrics/issues/211)
1515

16+
### Added
17+
- Age buckets in `summary`
18+
1619
## [0.8.0] - 2021-04-13
1720
### Added
1821
- New default metrics: cpu_user_time, cpu_system_time

README.md

+7-6
Original file line numberDiff line numberDiff line change
@@ -118,10 +118,11 @@ Summary is exposed as multiple numerical values:
118118
```lua
119119
local metrics = require('metrics')
120120

121-
-- create a summary
121+
-- create a summary with a sliding window of 5 age buckets and 60s bucket lifetime
122122
local http_requests_latency = metrics.summary(
123123
'http_requests_latency', 'HTTP requests total',
124-
{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01}
124+
{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01},
125+
{max_age_time = 60, age_buckets_count = 5}
125126
)
126127

127128
-- somewhere in the HTTP requests middleware:
@@ -212,10 +213,10 @@ via configuration.
212213
local metrics = cartridge.service_get('metrics')
213214
```
214215

215-
5. There is an ability in Tarantool Cartridge >= '2.4.0' to set a zone for each
216-
server in cluster. If zone was set for the server 'zone' label for all metrics
217-
of this server will be added.
218-
216+
5. There is an ability in Tarantool Cartridge >= '2.4.0' to set a zone for each
217+
server in a cluster. If a zone was set for the server 'zone' label will be added
218+
for all metrics on this server.
219+
219220
## Next steps
220221

221222
See:

doc/locale/en/api_reference.pot

+9-3
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ msgid ""
88
msgstr ""
99
"Project-Id-Version: Monitoring \n"
1010
"Report-Msgid-Bugs-To: \n"
11-
"POT-Creation-Date: 2021-04-02 01:27+0300\n"
11+
"POT-Creation-Date: 2021-05-26 12:00+0000\n"
1212
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
1313
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
1414
"Language-Team: LANGUAGE <[email protected]>\n"
@@ -139,7 +139,10 @@ msgstr ""
139139
msgid "Registers a new summary. Quantile computation is based on the algorithm `\"Effective computation of biased quantiles over data streams\" <https://ieeexplore.ieee.org/document/1410103>`_"
140140
msgstr ""
141141

142-
msgid "Quantiles to observe in the form ``{quantile = error, ... }``. For example: ``{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01}``"
142+
msgid "A list of 'targeted' φ-quantiles in the form ``{quantile = error, ... }``. For example: ``{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01}``. A targeted φ-quantile is specified in the form of a φ-quantile and tolerated error. For example a ``{[0.5] = 0.1}`` means that the median (= 50th percentile) should be returned with 10 percent error. Note that percentiles and quantiles are the same concept, except percentiles are expressed as percentages. The φ-quantile must be in the interval [0, 1]. Note that a lower tolerated error for a φ-quantile results in higher usage of resources (memory and CPU) to calculate the summary."
143+
msgstr ""
144+
145+
msgid "Table of summary parameters, used for configuring sliding window of time. 'Sliding window' consists of several buckets to store observations. New observations are added to each bucket. After a time period, the 'head' bucket (bucket from which observations are collected) is reset and the next bucket becomes a new 'head'. I.e. each bucket will store observations for ``max_age_time * age_buckets_count`` seconds before it will be reset. ``max_age_time`` sets the duration of each bucket lifetime, i.e., how long observations are kept before they are discarded, in seconds ``age_buckets_count`` sets the number of buckets of the time window. It determines the number of buckets used to exclude observations that are older than ``max_age_time`` from the Summary. The value is a trade-off between resources (memory and CPU for maintaining the bucket) and how smooth the time window is moved. Default value is `{max_age_time = math.huge, age_buckets_count = 1}`"
143146
msgstr ""
144147

145148
msgid "Summary object"
@@ -163,7 +166,10 @@ msgstr ""
163166
msgid "Value to put in the data stream."
164167
msgstr ""
165168

166-
msgid "Returns a concatenation of ``counter_obj:collect()`` across all internal counters of ``summary_obj``. For ``observation`` description, see :ref:`counter_obj:collect() <counter-collect>`."
169+
msgid "A table containing label names as keys, label values as values (table). A new value is observed by all internal counters with these labels specified. Label ``\"quantile\"`` are not allowed in ``summary``. It will be added automatically. If ``max_age_time`` and ``age_buckets_count`` are set, the observed value will be added to each bucket."
170+
msgstr ""
171+
172+
msgid "Returns a concatenation of ``counter_obj:collect()`` across all internal counters of ``summary_obj``. For ``observation`` description, see :ref:`counter_obj:collect() <counter-collect>`. If ``max_age_time`` and ``age_buckets_count`` are set, quantile observations will be collect only from the head bucket in sliding window and not from every bucket."
167173
msgstr ""
168174

169175
msgid "Labels"

doc/locale/en/metrics_reference.pot

+1-1
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ msgid ""
88
msgstr ""
99
"Project-Id-Version: Monitoring \n"
1010
"Report-Msgid-Bugs-To: \n"
11-
"POT-Creation-Date: 2021-05-27 09:04+0000\n"
11+
"POT-Creation-Date: 2021-05-28 12:41+0000\n"
1212
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
1313
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
1414
"Language-Team: LANGUAGE <[email protected]>\n"

doc/monitoring/api_reference.rst

+37-6
Original file line numberDiff line numberDiff line change
@@ -162,12 +162,35 @@ Summary
162162

163163
.. function:: summary(name [, help, objectives])
164164

165-
Registers a new summary. Quantile computation is based on the algorithm `"Effective computation of biased quantiles over data streams" <https://ieeexplore.ieee.org/document/1410103>`_
165+
Registers a new summary. Quantile computation is based on the algorithm
166+
`"Effective computation of biased quantiles over data streams" <https://ieeexplore.ieee.org/document/1410103>`_
166167

167168
:param string name: Collector name. Must be unique.
168169
:param string help: Help description.
169-
:param table objectives: Quantiles to observe in the form ``{quantile = error, ... }``.
170-
For example: ``{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01}``
170+
:param table objectives: A list of 'targeted' φ-quantiles in the form ``{quantile = error, ... }``.
171+
For example: ``{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01}``.
172+
A targeted φ-quantile is specified in the form of a φ-quantile and tolerated
173+
error. For example a ``{[0.5] = 0.1}`` means that the median (= 50th
174+
percentile) should be returned with 10 percent error. Note that
175+
percentiles and quantiles are the same concept, except percentiles are
176+
expressed as percentages. The φ-quantile must be in the interval [0, 1].
177+
Note that a lower tolerated error for a φ-quantile results in higher
178+
usage of resources (memory and CPU) to calculate the summary.
179+
180+
:param table params: Table of summary parameters, used for configuring sliding
181+
window of time. 'Sliding window' consists of several buckets to store observations.
182+
New observations are added to each bucket. After a time period, the 'head' bucket
183+
(bucket from which observations are collected) is reset and the next bucket becomes a
184+
new 'head'. I.e. each bucket will store observations for
185+
``max_age_time * age_buckets_count`` seconds before it will be reset.
186+
``max_age_time`` sets the duration of each bucket lifetime, i.e., how long
187+
observations are kept before they are discarded, in seconds
188+
``age_buckets_count`` sets the number of buckets of the time window. It
189+
determines the number of buckets used to exclude observations that are
190+
older than ``max_age_time`` from the Summary. The value is
191+
a trade-off between resources (memory and CPU for maintaining the bucket)
192+
and how smooth the time window is moved.
193+
Default value is `{max_age_time = math.huge, age_buckets_count = 1}`
171194

172195
:return: Summary object
173196

@@ -190,16 +213,23 @@ Summary
190213
Records a new value in a summary.
191214

192215
:param number num: Value to put in the data stream.
193-
:param table label_pairs: Table containing label names as keys,
216+
:param table label_pairs: A table containing label names as keys,
194217
label values as values (table).
195218
A new value is observed by all internal counters
196219
with these labels specified.
220+
Label ``"quantile"`` are not allowed in ``summary``.
221+
It will be added automatically.
222+
If ``max_age_time`` and ``age_buckets_count`` are set,
223+
the observed value will be added to each bucket.
197224

198225
.. method:: collect()
199226

200227
Returns a concatenation of ``counter_obj:collect()`` across all internal
201228
counters of ``summary_obj``. For ``observation`` description,
202229
see :ref:`counter_obj:collect() <counter-collect>`.
230+
If ``max_age_time`` and ``age_buckets_count`` are set, quantile observations
231+
will be collect only from the head bucket in sliding window and not from every
232+
bucket.
203233

204234
.. _labels:
205235

@@ -452,10 +482,11 @@ Using summaries:
452482
453483
local metrics = require('metrics')
454484
455-
-- create a summary
485+
-- create a summary with a window of 5 age buckets and 60s bucket lifetime
456486
local http_requests_latency = metrics.summary(
457487
'http_requests_latency', 'HTTP requests total',
458-
{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01}
488+
{[0.5]=0.01, [0.9]=0.01, [0.99]=0.01},
489+
{max_age_time = 60, age_buckets_count = 5}
459490
)
460491
461492
-- somewhere in the HTTP requests middleware:

metrics/collectors/summary.lua

+68-13
Original file line numberDiff line numberDiff line change
@@ -6,20 +6,28 @@ local fiber = require('fiber')
66

77
local Summary = Shared:new_class('summary', {'observe_latency'})
88

9-
function Summary:new(name, help, objectives)
9+
function Summary:new(name, help, objectives, params)
10+
params = params or {}
1011
local obj = Shared.new(self, name, help)
1112

1213
obj.count_collector = Counter:new(name .. '_count', help)
1314
obj.sum_collector = Counter:new(name .. '_sum', help)
1415
obj.objectives = objectives
16+
obj.max_age_time = params.max_age_time
17+
obj.age_buckets_count = params.age_buckets_count or 1
18+
obj.observations = {}
1519

20+
obj.quantiles = {}
21+
for q, _ in pairs(objectives) do
22+
table.insert(obj.quantiles, q)
23+
end
1624
return obj
1725
end
1826

1927
function Summary.check_quantiles(objectives)
2028
for k, v in pairs(objectives) do
2129
if type(k) ~= 'number' then return false end
22-
if k >= 1 or k < 0 then return false end
30+
if k > 1 or k < 0 then return false end
2331
if type(v) ~= 'number' then return false end
2432
end
2533
return true
@@ -31,35 +39,69 @@ function Summary:set_registry(registry)
3139
self.sum_collector:set_registry(registry)
3240
end
3341

42+
function Summary:rotate_age_buckets(key)
43+
local obs_object = self.observations[key]
44+
local old_index = obs_object.head_bucket_index
45+
obs_object.head_bucket_index = ((obs_object.head_bucket_index + 1) % self.age_buckets_count) + 1
46+
Quantile.Reset(obs_object.buckets[old_index])
47+
obs_object.last_rotate = os.time()
48+
end
49+
3450
function Summary:observe(num, label_pairs)
3551
label_pairs = label_pairs or {}
36-
52+
if label_pairs.quantile then
53+
error('Label "quantile" are not allowed in summary')
54+
end
3755
self.count_collector:inc(1, label_pairs)
3856
self.sum_collector:inc(num, label_pairs)
3957
if self.objectives then
58+
local now = os.time()
4059
local key = self.make_key(label_pairs)
60+
4161
if not self.observations[key] then
42-
self.observations[key] = Quantile.NewTargeted(self.objectives)
62+
local obs_object = {
63+
buckets = {},
64+
head_bucket_index = 1,
65+
last_rotate = now,
66+
label_pairs = label_pairs,
67+
}
4368
self.label_pairs[key] = label_pairs
69+
for i = 1, self.age_buckets_count do
70+
local quantile_obj = Quantile.NewTargeted(self.objectives)
71+
Quantile.Insert(quantile_obj, num)
72+
obs_object.buckets[i] = quantile_obj
73+
end
74+
self.observations[key] = obs_object
75+
else
76+
local obs_object = self.observations[key]
77+
if self.age_buckets_count > 1 and now - obs_object.last_rotate >= self.max_age_time then
78+
self:rotate_age_buckets(key)
79+
end
80+
for _, bucket in ipairs(obs_object.buckets) do
81+
Quantile.Insert(bucket, num)
82+
end
4483
end
45-
Quantile.Insert(self.observations[key], num)
4684
end
4785
end
4886

49-
5087
function Summary:collect_quantiles()
51-
if next(self.observations) == nil then
88+
if not self.objectives or next(self.observations) == nil then
5289
return {}
5390
end
91+
5492
local result = {}
93+
local now = os.time()
5594
for key, observation in pairs(self.observations) do
56-
for objective, _ in pairs(self.objectives) do
57-
local label_pairs = table.deepcopy(self:append_global_labels(self.label_pairs[key]))
95+
if self.age_buckets_count > 1 and now - observation.last_rotate >= self.max_age_time then
96+
self:rotate_age_buckets(key)
97+
end
98+
for _, objective in ipairs(self.quantiles) do
99+
local label_pairs = table.deepcopy(self:append_global_labels(observation.label_pairs))
58100
label_pairs.quantile = objective
59101
local obs = {
60102
metric_name = self.name,
61103
label_pairs = label_pairs,
62-
value = Quantile.Query(observation, objective),
104+
value = Quantile.Query(observation.buckets[observation.head_bucket_index], objective),
63105
timestamp = fiber.time64(),
64106
}
65107
table.insert(result, obs)
@@ -70,16 +112,29 @@ end
70112

71113
function Summary:collect()
72114
local result = {}
73-
for _, obs in pairs(self.count_collector:collect()) do
115+
for _, obs in ipairs(self.count_collector:collect()) do
74116
table.insert(result, obs)
75117
end
76-
for _, obs in pairs(self.sum_collector:collect()) do
118+
for _, obs in ipairs(self.sum_collector:collect()) do
77119
table.insert(result, obs)
78120
end
79-
for _, obs in pairs(self:collect_quantiles()) do
121+
for _, obs in ipairs(self:collect_quantiles()) do
80122
table.insert(result, obs)
81123
end
82124
return result
83125
end
84126

127+
-- debug function to get observation quantiles from summary
128+
-- returns array of quantile objects or
129+
-- single quantile object if summary has only one bucket
130+
function Summary:get_observations(label_pairs)
131+
local key = self.make_key(label_pairs or {})
132+
local obs = self.observations[key]
133+
if self.age_buckets_count > 1 then
134+
return obs
135+
else
136+
return obs.buckets[1]
137+
end
138+
end
139+
85140
return Summary

metrics/http_middleware.lua

+6-1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ export.DEFAULT_QUANTILES = {
1515
[0.99] = 0.01,
1616
}
1717

18+
export.DEFAULT_SUMMARY_PARAMS = {
19+
max_age_time = 60,
20+
age_buckets_count = 5,
21+
}
22+
1823
--- Build default histogram collector
1924
--
2025
-- @string[opt='histogram'] type_name `histogram` or `average` or `summary`
@@ -29,7 +34,7 @@ function export.build_default_collector(type_name, name, help)
2934
if type_name == 'histogram' then
3035
extra = {export.DEFAULT_HISTOGRAM_BUCKETS}
3136
elseif type_name == 'summary' then
32-
extra = {export.DEFAULT_QUANTILES}
37+
extra = {export.DEFAULT_QUANTILES, export.DEFAULT_SUMMARY_PARAMS}
3338
elseif type_name == 'average' then
3439
log.warn('Average collector is deprecated. Use summary collector instead.')
3540
else

metrics/init.lua

+18-3
Original file line numberDiff line numberDiff line change
@@ -56,13 +56,28 @@ local function histogram(name, help, buckets)
5656
return registry:find_or_create(Histogram, name, help, buckets)
5757
end
5858

59-
local function summary(name, help, objectives)
60-
checks('string', '?string', '?table')
59+
local function summary(name, help, objectives, params)
60+
checks('string', '?string', '?table', {
61+
age_buckets_count = '?number',
62+
max_age_time = '?number',
63+
})
6164
if objectives ~= nil and not Summary.check_quantiles(objectives) then
6265
error('Invalid value for objectives')
6366
end
67+
params = params or {}
68+
local age_buckets_count = params.age_buckets_count
69+
local max_age_time = params.max_age_time
70+
if max_age_time and max_age_time <= 0 then
71+
error('Max age must be positive')
72+
end
73+
if age_buckets_count and age_buckets_count < 1 then
74+
error('Age buckets count must be greater or equal than one')
75+
end
76+
if (max_age_time and not age_buckets_count) or (not max_age_time and age_buckets_count) then
77+
error('Age buckets count and max age must be present only together')
78+
end
6479

65-
return registry:find_or_create(Summary, name, help, objectives)
80+
return registry:find_or_create(Summary, name, help, objectives, params)
6681
end
6782

6883
local function set_global_labels(label_pairs)

metrics/quantile.lua

+6
Original file line numberDiff line numberDiff line change
@@ -284,6 +284,12 @@ function quantile.Reset(stream_obj)
284284
stream_obj.stream.n = 0
285285
stream_obj.b_len = 0
286286
stream_obj.stream.l_len = 0
287+
for i = 1, stream_obj.__max_samples * 2 + 1 do
288+
stream_obj.stream.l[i] = inf_obj
289+
end
290+
for i = 0, stream_obj.__max_samples - 1 do
291+
stream_obj.b[i] = math.huge
292+
end
287293
end
288294

289295
return quantile

0 commit comments

Comments
 (0)