Skip to content

Commit b93460d

Browse files
jhg03aglinton
authored andcommitted
Split multiple sensor keys in ipmi input (influxdata#4450)
1 parent efe61ee commit b93460d

File tree

4 files changed

+361
-38
lines changed

4 files changed

+361
-38
lines changed

etc/telegraf.conf

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1976,6 +1976,9 @@
19761976
# ## Timeout for the ipmitool command to complete
19771977
# timeout = "20s"
19781978

1979+
# ## Schema Version: (Optional, defaults to version 1)
1980+
# schemaVersion = 2
1981+
19791982

19801983
# # Gather packets and bytes counters from Linux ipsets
19811984
# [[inputs.ipset]]

plugins/inputs/ipmi_sensor/README.md

Lines changed: 48 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,10 @@ If no servers are specified, the plugin will query the local machine sensor stat
88
```
99
ipmitool sdr
1010
```
11+
or with the version 2 schema:
12+
```
13+
ipmitool sdr elist
14+
```
1115

1216
When one or more servers are specified, the plugin will use the following command to collect remote host sensor stats:
1317

@@ -41,19 +45,36 @@ ipmitool -I lan -H SERVER -U USERID -P PASSW0RD sdr
4145

4246
## Timeout for the ipmitool command to complete. Default is 20 seconds.
4347
timeout = "20s"
48+
49+
## Schema Version: (Optional, defaults to version 1)
50+
metric_version = 2
4451
```
4552

4653
### Measurements
4754

55+
Version 1 schema:
4856
- ipmi_sensor:
4957
- tags:
5058
- name
5159
- unit
60+
- host
5261
- server (only when retrieving stats from remote servers)
5362
- fields:
54-
- status (int)
63+
- status (int, 1=ok status_code/0=anything else)
5564
- value (float)
5665

66+
Version 2 schema:
67+
- ipmi_sensor:
68+
- tags:
69+
- name
70+
- entity_id (can help uniquify duplicate names)
71+
- status_code (two letter code from IPMI documentation)
72+
- status_desc (extended status description field)
73+
- unit (only on analog values)
74+
- host
75+
- server (only when retrieving stats from remote)
76+
- fields:
77+
- value (float)
5778

5879
#### Permissions
5980

@@ -68,24 +89,36 @@ KERNEL=="ipmi*", MODE="660", GROUP="telegraf"
6889

6990
### Example Output
7091

92+
#### Version 1 Schema
7193
When retrieving stats from a remote server:
7294
```
73-
ipmi_sensor,server=10.20.2.203,unit=degrees_c,name=ambient_temp status=1i,value=20 1458488465012559455
74-
ipmi_sensor,server=10.20.2.203,unit=feet,name=altitude status=1i,value=80 1458488465012688613
75-
ipmi_sensor,server=10.20.2.203,unit=watts,name=avg_power status=1i,value=220 1458488465012776511
76-
ipmi_sensor,server=10.20.2.203,unit=volts,name=planar_3.3v status=1i,value=3.28 1458488465012861875
77-
ipmi_sensor,server=10.20.2.203,unit=volts,name=planar_vbat status=1i,value=3.04 1458488465013072508
78-
ipmi_sensor,server=10.20.2.203,unit=rpm,name=fan_1a_tach status=1i,value=2610 1458488465013137932
79-
ipmi_sensor,server=10.20.2.203,unit=rpm,name=fan_1b_tach status=1i,value=1775 1458488465013279896
95+
ipmi_sensor,server=10.20.2.203,name=uid_light value=0,status=1i 1517125513000000000
96+
ipmi_sensor,server=10.20.2.203,name=sys._health_led status=1i,value=0 1517125513000000000
97+
ipmi_sensor,server=10.20.2.203,name=power_supply_1,unit=watts status=1i,value=110 1517125513000000000
98+
ipmi_sensor,server=10.20.2.203,name=power_supply_2,unit=watts status=1i,value=120 1517125513000000000
99+
ipmi_sensor,server=10.20.2.203,name=power_supplies value=0,status=1i 1517125513000000000
100+
ipmi_sensor,server=10.20.2.203,name=fan_1,unit=percent status=1i,value=43.12 1517125513000000000
80101
```
81102

103+
104+
When retrieving stats from the local machine (no server specified):
105+
```
106+
ipmi_sensor,name=uid_light value=0,status=1i 1517125513000000000
107+
ipmi_sensor,name=sys._health_led status=1i,value=0 1517125513000000000
108+
ipmi_sensor,name=power_supply_1,unit=watts status=1i,value=110 1517125513000000000
109+
ipmi_sensor,name=power_supply_2,unit=watts status=1i,value=120 1517125513000000000
110+
ipmi_sensor,name=power_supplies value=0,status=1i 1517125513000000000
111+
ipmi_sensor,name=fan_1,unit=percent status=1i,value=43.12 1517125513000000000
112+
```
113+
114+
#### Version 2 Schema
115+
82116
When retrieving stats from the local machine (no server specified):
83117
```
84-
ipmi_sensor,unit=degrees_c,name=ambient_temp status=1i,value=20 1458488465012559455
85-
ipmi_sensor,unit=feet,name=altitude status=1i,value=80 1458488465012688613
86-
ipmi_sensor,unit=watts,name=avg_power status=1i,value=220 1458488465012776511
87-
ipmi_sensor,unit=volts,name=planar_3.3v status=1i,value=3.28 1458488465012861875
88-
ipmi_sensor,unit=volts,name=planar_vbat status=1i,value=3.04 1458488465013072508
89-
ipmi_sensor,unit=rpm,name=fan_1a_tach status=1i,value=2610 1458488465013137932
90-
ipmi_sensor,unit=rpm,name=fan_1b_tach status=1i,value=1775 1458488465013279896
118+
ipmi_sensor,name=uid_light,entity_id=23.1,status_code=ok,status_desc=ok value=0 1517125474000000000
119+
ipmi_sensor,name=sys._health_led,entity_id=23.2,status_code=ok,status_desc=ok value=0 1517125474000000000
120+
ipmi_sensor,entity_id=10.1,name=power_supply_1,status_code=ok,status_desc=presence_detected,unit=watts value=110 1517125474000000000
121+
ipmi_sensor,name=power_supply_2,entity_id=10.2,status_code=ok,unit=watts,status_desc=presence_detected value=125 1517125474000000000
122+
ipmi_sensor,name=power_supplies,entity_id=10.3,status_code=ok,status_desc=fully_redundant value=0 1517125474000000000
123+
ipmi_sensor,entity_id=7.1,name=fan_1,status_code=ok,status_desc=transition_to_running,unit=percent value=43.12 1517125474000000000
91124
```

plugins/inputs/ipmi_sensor/ipmi.go

Lines changed: 115 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,11 @@
11
package ipmi_sensor
22

33
import (
4+
"bufio"
5+
"bytes"
46
"fmt"
57
"os/exec"
8+
"regexp"
69
"strconv"
710
"strings"
811
"sync"
@@ -14,14 +17,20 @@ import (
1417
)
1518

1619
var (
17-
execCommand = exec.Command // execCommand is used to mock commands in tests.
20+
execCommand = exec.Command // execCommand is used to mock commands in tests.
21+
re_v1_parse_line = regexp.MustCompile(`^(?P<name>[^|]*)\|(?P<description>[^|]*)\|(?P<status_code>.*)`)
22+
re_v2_parse_line = regexp.MustCompile(`^(?P<name>[^|]*)\|[^|]+\|(?P<status_code>[^|]*)\|(?P<entity_id>[^|]*)\|(?:(?P<description>[^|]+))?`)
23+
re_v2_parse_description = regexp.MustCompile(`^(?P<analogValue>[0-9.]+)\s(?P<analogUnit>.*)|(?P<status>.+)|^$`)
24+
re_v2_parse_unit = regexp.MustCompile(`^(?P<realAnalogUnit>[^,]+)(?:,\s*(?P<statusDesc>.*))?`)
1825
)
1926

27+
// Ipmi stores the configuration values for the ipmi_sensor input plugin
2028
type Ipmi struct {
21-
Path string
22-
Privilege string
23-
Servers []string
24-
Timeout internal.Duration
29+
Path string
30+
Privilege string
31+
Servers []string
32+
Timeout internal.Duration
33+
MetricVersion int
2534
}
2635

2736
var sampleConfig = `
@@ -46,16 +55,22 @@ var sampleConfig = `
4655
4756
## Timeout for the ipmitool command to complete
4857
timeout = "20s"
58+
59+
## Schema Version: (Optional, defaults to version 1)
60+
metric_version = 2
4961
`
5062

63+
// SampleConfig returns the documentation about the sample configuration
5164
func (m *Ipmi) SampleConfig() string {
5265
return sampleConfig
5366
}
5467

68+
// Description returns a basic description for the plugin functions
5569
func (m *Ipmi) Description() string {
5670
return "Read metrics from the bare metal servers via IPMI"
5771
}
5872

73+
// Gather is the main execution function for the plugin
5974
func (m *Ipmi) Gather(acc telegraf.Accumulator) error {
6075
if len(m.Path) == 0 {
6176
return fmt.Errorf("ipmitool not found: verify that ipmitool is installed and that ipmitool is in your PATH")
@@ -93,23 +108,33 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
93108
opts = conn.options()
94109
}
95110
opts = append(opts, "sdr")
111+
if m.MetricVersion == 2 {
112+
opts = append(opts, "elist")
113+
}
96114
cmd := execCommand(m.Path, opts...)
97115
out, err := internal.CombinedOutputTimeout(cmd, m.Timeout.Duration)
116+
timestamp := time.Now()
98117
if err != nil {
99118
return fmt.Errorf("failed to run command %s: %s - %s", strings.Join(cmd.Args, " "), err, string(out))
100119
}
120+
if m.MetricVersion == 2 {
121+
return parseV2(acc, hostname, out, timestamp)
122+
}
123+
return parseV1(acc, hostname, out, timestamp)
124+
}
101125

126+
func parseV1(acc telegraf.Accumulator, hostname string, cmdOut []byte, measured_at time.Time) error {
102127
// each line will look something like
103128
// Planar VBAT | 3.05 Volts | ok
104-
lines := strings.Split(string(out), "\n")
105-
for i := 0; i < len(lines); i++ {
106-
vals := strings.Split(lines[i], "|")
107-
if len(vals) != 3 {
129+
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
130+
for scanner.Scan() {
131+
ipmiFields := extractFieldsFromRegex(re_v1_parse_line, scanner.Text())
132+
if len(ipmiFields) != 3 {
108133
continue
109134
}
110135

111136
tags := map[string]string{
112-
"name": transform(vals[0]),
137+
"name": transform(ipmiFields["name"]),
113138
}
114139

115140
// tag the server is we have one
@@ -118,38 +143,106 @@ func (m *Ipmi) parse(acc telegraf.Accumulator, server string) error {
118143
}
119144

120145
fields := make(map[string]interface{})
121-
if strings.EqualFold("ok", trim(vals[2])) {
146+
if strings.EqualFold("ok", trim(ipmiFields["status_code"])) {
122147
fields["status"] = 1
123148
} else {
124149
fields["status"] = 0
125150
}
126151

127-
val1 := trim(vals[1])
128-
129-
if strings.Index(val1, " ") > 0 {
152+
if strings.Index(ipmiFields["description"], " ") > 0 {
130153
// split middle column into value and unit
131-
valunit := strings.SplitN(val1, " ", 2)
132-
fields["value"] = Atofloat(valunit[0])
154+
valunit := strings.SplitN(ipmiFields["description"], " ", 2)
155+
var err error
156+
fields["value"], err = aToFloat(valunit[0])
157+
if err != nil {
158+
continue
159+
}
133160
if len(valunit) > 1 {
134161
tags["unit"] = transform(valunit[1])
135162
}
136163
} else {
137164
fields["value"] = 0.0
138165
}
139166

140-
acc.AddFields("ipmi_sensor", fields, tags, time.Now())
167+
acc.AddFields("ipmi_sensor", fields, tags, measured_at)
141168
}
142169

143-
return nil
170+
return scanner.Err()
144171
}
145172

146-
func Atofloat(val string) float64 {
173+
func parseV2(acc telegraf.Accumulator, hostname string, cmdOut []byte, measured_at time.Time) error {
174+
// each line will look something like
175+
// CMOS Battery | 65h | ok | 7.1 |
176+
// Temp | 0Eh | ok | 3.1 | 55 degrees C
177+
// Drive 0 | A0h | ok | 7.1 | Drive Present
178+
scanner := bufio.NewScanner(bytes.NewReader(cmdOut))
179+
for scanner.Scan() {
180+
ipmiFields := extractFieldsFromRegex(re_v2_parse_line, scanner.Text())
181+
if len(ipmiFields) < 3 || len(ipmiFields) > 4 {
182+
continue
183+
}
184+
185+
tags := map[string]string{
186+
"name": transform(ipmiFields["name"]),
187+
}
188+
189+
// tag the server is we have one
190+
if hostname != "" {
191+
tags["server"] = hostname
192+
}
193+
tags["entity_id"] = transform(ipmiFields["entity_id"])
194+
tags["status_code"] = trim(ipmiFields["status_code"])
195+
fields := make(map[string]interface{})
196+
descriptionResults := extractFieldsFromRegex(re_v2_parse_description, trim(ipmiFields["description"]))
197+
// This is an analog value with a unit
198+
if descriptionResults["analogValue"] != "" && len(descriptionResults["analogUnit"]) >= 1 {
199+
var err error
200+
fields["value"], err = aToFloat(descriptionResults["analogValue"])
201+
if err != nil {
202+
continue
203+
}
204+
// Some implementations add an extra status to their analog units
205+
unitResults := extractFieldsFromRegex(re_v2_parse_unit, descriptionResults["analogUnit"])
206+
tags["unit"] = transform(unitResults["realAnalogUnit"])
207+
if unitResults["statusDesc"] != "" {
208+
tags["status_desc"] = transform(unitResults["statusDesc"])
209+
}
210+
} else {
211+
// This is a status value
212+
fields["value"] = 0.0
213+
// Extended status descriptions aren't required, in which case for consistency re-use the status code
214+
if descriptionResults["status"] != "" {
215+
tags["status_desc"] = transform(descriptionResults["status"])
216+
} else {
217+
tags["status_desc"] = transform(ipmiFields["status_code"])
218+
}
219+
}
220+
221+
acc.AddFields("ipmi_sensor", fields, tags, measured_at)
222+
}
223+
224+
return scanner.Err()
225+
}
226+
227+
// extractFieldsFromRegex consumes a regex with named capture groups and returns a kvp map of strings with the results
228+
func extractFieldsFromRegex(re *regexp.Regexp, input string) map[string]string {
229+
submatches := re.FindStringSubmatch(input)
230+
results := make(map[string]string)
231+
for i, name := range re.SubexpNames() {
232+
if name != input && name != "" && input != "" {
233+
results[name] = trim(submatches[i])
234+
}
235+
}
236+
return results
237+
}
238+
239+
// aToFloat converts string representations of numbers to float64 values
240+
func aToFloat(val string) (float64, error) {
147241
f, err := strconv.ParseFloat(val, 64)
148242
if err != nil {
149-
return 0.0
150-
} else {
151-
return f
243+
return 0.0, err
152244
}
245+
return f, nil
153246
}
154247

155248
func trim(s string) string {

0 commit comments

Comments
 (0)