diff --git a/README.md b/README.md
index 5b6e344..7a15a71 100644
--- a/README.md
+++ b/README.md
@@ -405,7 +405,7 @@ An example Vagrant project has been included to get you started right away.
check_docker |
- maartenbeeckmans |
+ Maarten Beeckmans |
upstream |
@@ -413,6 +413,56 @@ An example Vagrant project has been included to get you started right away.
Robin Ophalvens |
|
+
+ check_ceph_df |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_health |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_mds |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_mgr |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_mon |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_osd |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_osd_db |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_osd_df |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_rgw |
+ Maarten Beeckmans |
+ upstream |
+
+
+ check_ceph_rgw_api |
+ Maarten Beeckmans |
+ upstream |
+
### Contributions
diff --git a/build.txt b/build.txt
index 9d1a665..850e3e9 100644
--- a/build.txt
+++ b/build.txt
@@ -102,4 +102,14 @@ check_mysql-replication-configured 0.2
check_s3-bucket 0.1
check_docker 2.3
check_wireguard 1.0
+check_ceph_df 1.7.1
+check_ceph_health 1.7.0
+check_ceph_mds 1.6.0
+check_ceph_mgr 1.0.0
+check_ceph_mon 1.5.0
+check_ceph_osd 1.5.2
+check_ceph_osd_db 1.0.0
+check_ceph_osd_df 1.0.0
+check_ceph_rgw 1.5.1
+check_ceph_rgw_api 1.7.2
# vim: set ts=2 sw=2 et : #
diff --git a/check_ceph_df b/check_ceph_df
new file mode 100755
index 0000000..e56d682
--- /dev/null
+++ b/check_ceph_df
@@ -0,0 +1,232 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 SWITCH http://www.switch.ch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import argparse
+import os
+import subprocess
+import sys
+
+__version__ = '1.7.1'
+
+# default ceph values
+CEPH_COMMAND = '/usr/bin/ceph'
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_ERROR = 2
+STATUS_UNKNOWN = 3
+
+def main():
+
+ # parse args
+ parser = argparse.ArgumentParser(description="'ceph df' nagios plugin.")
+ parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
+ parser.add_argument('-c','--conf', help='alternative ceph conf file')
+ parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
+ parser.add_argument('-i','--id', help='ceph client id')
+ parser.add_argument('-n','--name', help='ceph client name')
+ parser.add_argument('-k','--keyring', help='ceph client keyring file')
+ parser.add_argument('-p','--pool', help='ceph pool name')
+ parser.add_argument('-d','--detail', help="show pool details on warn and critical", action='store_true')
+ parser.add_argument('-W','--warn', help="warn above this percent RAW USED", type=float)
+ parser.add_argument('-C','--critical', help="critical alert above this percent RAW USED", type=float)
+ parser.add_argument('-V','--version', help='show version and exit', action='store_true')
+ args = parser.parse_args()
+
+ # validate args
+ ceph_exec = args.exe if args.exe else CEPH_COMMAND
+ if not os.path.exists(ceph_exec):
+ print("ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
+ return STATUS_UNKNOWN
+
+ if args.version:
+ print('version %s' % __version__)
+ return STATUS_OK
+
+ if args.conf and not os.path.exists(args.conf):
+ print("ERROR: ceph conf file '%s' doesn't exist" % args.conf)
+ return STATUS_UNKNOWN
+
+ if args.keyring and not os.path.exists(args.keyring):
+ print("ERROR: keyring file '%s' doesn't exist" % args.keyring)
+ return STATUS_UNKNOWN
+
+ if not args.warn or not args.critical or args.warn > args.critical:
+ print("ERROR: warn and critical level must be set and critical must be greater than warn")
+ return STATUS_UNKNOWN
+
+ # build command
+ ceph_df = [ceph_exec]
+ if args.monaddress:
+ ceph_df.append('-m')
+ ceph_df.append(args.monaddress)
+ if args.conf:
+ ceph_df.append('-c')
+ ceph_df.append(args.conf)
+ if args.id:
+ ceph_df.append('--id')
+ ceph_df.append(args.id)
+ if args.name:
+ ceph_df.append('--name')
+ ceph_df.append(args.name)
+ if args.keyring:
+ ceph_df.append('--keyring')
+ ceph_df.append(args.keyring)
+ ceph_df.append('df')
+
+ #print ceph_df
+
+ # exec command
+ p = subprocess.Popen(ceph_df,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+
+ # parse output
+ # print "DEBUG: output:", output
+ # print "DEBUG: err:", err
+ if output:
+ output = output.decode('utf-8')
+ # parse output
+ # if detail switch was not set only show global values and compare to warning and critical
+ # otherwise show space for pools too
+ result=output.splitlines()
+ # values for GLOBAL are in 3rd line of output
+ globalline = result[2]
+ globalvals = globalline.split()
+ # Luminous vs Minic output (27.3TiB vs 27.3 TiB)
+ if len(globalvals) == 7:
+ gv = []
+ gv.append("{}{}".format(globalvals[0], globalvals[1]))
+ gv.append("{}{}".format(globalvals[2], globalvals[3]))
+ gv.append("{}{}".format(globalvals[4], globalvals[5]))
+ gv.append(globalvals[6])
+ globalvals = gv
+ #print "XXX: globalvals: {} {}".format(len(globalvals), globalvals)
+ # Nautilus output
+ if len(globalvals) == 10:
+ gv = []
+ gv.append("{}{}".format(globalvals[1], globalvals[2]))
+ gv.append("{}{}".format(globalvals[3], globalvals[4]))
+ gv.append("{}{}".format(globalvals[5], globalvals[6]))
+ gv.append(globalvals[9])
+ globalvals = gv
+ #print "XXX: globalvals: {} {}".format(len(globalvals), globalvals)
+
+ # prepare pool values
+ # pool output starts in line 4 with the bare word POOLS: followed by the output
+ poollines = result[3:]
+
+ if args.pool:
+ for line in poollines:
+ if args.pool in line:
+ poolvals = line.split()
+ # Luminous vs Minic output (27.3TiB vs 27.3 TiB)
+ if len(poolvals) == 8:
+ pv = []
+ pv.append(poolvals[0]) # NAME
+ pv.append(poolvals[1]) # ID
+ pv.append("{}{}".format(poolvals[2], poolvals[3])) # USED 27.3 TiB
+ pv.append(poolvals[4]) # %USED
+ pv.append("{}{}".format(poolvals[5], poolvals[6])) # MAX AVAIL 27.3 TiB
+ # pv.append(poolvals[7]) # OBJECTS
+ poolvals = pv
+ #print "XXX: poolvals: {} {}".format(len(poolvals), poolvals)
+ # Nautilus output
+ if len(poolvals) == 10:
+ pv = []
+ pv.append(poolvals[0]) # NAME
+ pv.append(poolvals[1]) # ID
+ pv.append("{}{}".format(poolvals[2], poolvals[3])) # USED 27.3 TiB
+ pv.append(poolvals[7]) # %USED
+ pv.append("{}{}".format(poolvals[8], poolvals[9])) # MAX AVAIL 27.3 TiB
+ # pv.append(poolvals[7]) # OBJECTS, not used
+ poolvals = pv
+ #print "XXX: poolvals: {} {}".format(len(poolvals), poolvals)
+ # Octopus >= v15.2.8 (pgs added to ceph-df)
+ if len(poolvals) == 11:
+ pv = []
+ pv.append(poolvals[0]) # NAME
+ pv.append(poolvals[1]) # ID
+ #pv.append(poolvals[2]) # PGS, not used
+ pv.append("{}{}".format(poolvals[3], poolvals[4])) # USED 27.3 TiB
+ pv.append(poolvals[8]) # %USED
+ pv.append("{}{}".format(poolvals[9], poolvals[10])) # MAX AVAIL 27.3 TiB
+ # pv.append(poolvals[7]) # OBJECTS, not used
+ poolvals = pv
+ #print "XXX: poolvals: {} {}".format(len(poolvals), poolvals)
+
+
+ pool_used = poolvals[2]
+ pool_usage_percent = float(poolvals[3])
+ pool_available_space = poolvals[4]
+ # pool_objects = float(poolvals[5]) # not used
+
+ if pool_usage_percent > args.critical:
+ print('CRITICAL: %s%% usage in Pool \'%s\' is above %s%% (%s used) | Usage=%s%%;%s;%s;;' % (pool_usage_percent, args.pool, args.critical, pool_used, pool_usage_percent, args.warn, args.critical))
+ return STATUS_ERROR
+ if pool_usage_percent > args.warn:
+ print('WARNING: %s%% usage in Pool \'%s\' is above %s%% (%s used) | Usage=%s%%;%s;%s;;' % (pool_usage_percent, args.pool, args.warn, pool_used, pool_usage_percent, args.warn, args.critical))
+ return STATUS_WARNING
+ else:
+ print('%s%% usage in Pool \'%s\' | Usage=%s%%;%s;%s;;' % (pool_usage_percent, args.pool, pool_usage_percent, args.warn, args.critical))
+ return STATUS_OK
+ else:
+ # print 'DEBUG:', globalvals
+ # finally 4th element contains percentual value
+ # print 'DEBUG USAGE:', globalvals[3]
+ global_usage_percent = float(globalvals[3])
+ global_available_space = globalvals[1]
+ global_total_space = globalvals[0]
+ # print 'DEBUG WARNLEVEL:', args.warn
+ # print 'DEBUG CRITICALLEVEL:', args.critical
+ if global_usage_percent > args.critical:
+ if args.detail:
+ poollines.insert(0, '\n')
+ poolout = '\n '.join(poollines)
+ else:
+ poolout = ''
+ print('CRITICAL: global RAW usage of %s%% is above %s%% (%s of %s free)%s | Usage=%s%%;%s;%s;;' % (global_usage_percent, args.critical, global_available_space, global_total_space, poolout, global_usage_percent, args.warn, args.critical))
+ return STATUS_ERROR
+ elif global_usage_percent > args.warn:
+ if args.detail:
+ poollines.insert(0, '\n')
+ poolout = '\n '.join(poollines)
+ else:
+ poolout = ''
+ print('WARNING: global RAW usage of %s%% is above %s%% (%s of %s free)%s | Usage=%s%%;%s;%s;;' % (global_usage_percent, args.warn, global_available_space, global_total_space, poolout, global_usage_percent, args.warn, args.critical))
+ return STATUS_WARNING
+ else:
+ print('RAW usage %s%% | Usage=%s%%;%s;%s;;' % (global_usage_percent, global_usage_percent, args.warn, args.critical))
+ return STATUS_OK
+
+ #for
+ elif err:
+ # read only first line of error
+ one_line = err.split('\n')[0]
+ if '-1 ' in one_line:
+ idx = one_line.rfind('-1 ')
+ print('ERROR: %s: %s' % (ceph_exec, one_line[idx+len('-1 '):]))
+ else:
+ print(one_line)
+
+ return STATUS_UNKNOWN
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_health b/check_ceph_health
new file mode 100755
index 0000000..f160299
--- /dev/null
+++ b/check_ceph_health
@@ -0,0 +1,200 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013-2016 SWITCH http://www.switch.ch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import argparse
+import os
+import subprocess
+import sys
+import re
+import json
+
+__version__ = '1.7.0'
+
+# default ceph values
+CEPH_ADM_COMMAND = '/usr/sbin/cephadm'
+CEPH_COMMAND = '/usr/bin/ceph'
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_ERROR = 2
+STATUS_UNKNOWN = 3
+
+
+def main():
+
+ # parse args
+ parser = argparse.ArgumentParser(description="'ceph health' nagios plugin.")
+ parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
+ parser.add_argument('-A','--admexe', help='cephadm executable [%s]' % CEPH_ADM_COMMAND)
+ parser.add_argument('--cluster', help='ceph cluster name')
+ parser.add_argument('-c','--conf', help='alternative ceph conf file')
+ parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
+ parser.add_argument('-i','--id', help='ceph client id')
+ parser.add_argument('-n','--name', help='ceph client name')
+ parser.add_argument('-k','--keyring', help='ceph client keyring file')
+ parser.add_argument('--check', help='regexp of which check(s) to check (luminous+) '
+ "Can be inverted, e.g. '^((?!(PG_DEGRADED|OBJECT_MISPLACED)$).)*$'")
+ parser.add_argument('-w','--whitelist', help='whitelist regexp for ceph health warnings')
+ parser.add_argument('-d','--detail', help="exec 'ceph health detail'", action='store_true')
+ parser.add_argument('-V','--version', help='show version and exit', action='store_true')
+ parser.add_argument('-a','--cephadm', help='uses cephadm to execute the command', action='store_true')
+ parser.add_argument('-s','--skip-muted', help='skip muted checks', action='store_true')
+ args = parser.parse_args()
+
+ # validate args
+ cephadm_exec = args.admexe if args.admexe else CEPH_ADM_COMMAND
+ ceph_exec = args.exe if args.exe else CEPH_COMMAND
+
+ if args.cephadm:
+ if not os.path.exists(cephadm_exec):
+ print("ERROR: cephadm executable '%s' doesn't exist" % cephadm_exec)
+ return STATUS_UNKNOWN
+ else:
+ if not os.path.exists(ceph_exec):
+ print("ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
+ return STATUS_UNKNOWN
+
+ if args.version:
+ print('version %s' % __version__)
+ return STATUS_OK
+
+ if args.conf and not os.path.exists(args.conf):
+ print("ERROR: ceph conf file '%s' doesn't exist" % args.conf)
+ return STATUS_UNKNOWN
+
+ if args.keyring and not os.path.exists(args.keyring):
+ print("ERROR: keyring file '%s' doesn't exist" % args.keyring)
+ return STATUS_UNKNOWN
+
+ # build command
+ ceph_health = [ceph_exec]
+
+ if args.cephadm:
+ # Prepend the command with the cephadm binary and the shell command
+ ceph_health = [cephadm_exec, 'shell'] + ceph_health
+
+ if args.monaddress:
+ ceph_health.append('-m')
+ ceph_health.append(args.monaddress)
+ if args.cluster:
+ ceph_health.append('--cluster')
+ ceph_health.append(args.cluster)
+ if args.conf:
+ ceph_health.append('-c')
+ ceph_health.append(args.conf)
+ if args.id:
+ ceph_health.append('--id')
+ ceph_health.append(args.id)
+ if args.name:
+ ceph_health.append('--name')
+ ceph_health.append(args.name)
+ if args.keyring:
+ ceph_health.append('--keyring')
+ ceph_health.append(args.keyring)
+ ceph_health.append('health')
+ if args.detail:
+ ceph_health.append('detail')
+
+ ceph_health.append('--format')
+ ceph_health.append('json')
+ #print(ceph_health)
+
+ # exec command
+ p = subprocess.Popen(ceph_health,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+ try:
+ output = json.loads(output)
+ except ValueError:
+ output = dict()
+
+ # parse output
+ # print "output:", output
+ #print "err:", err
+ if output:
+ ret = STATUS_OK
+ msg = ""
+ extended = []
+ if 'checks' in output:
+ #luminous
+ for check,status in output['checks'].items():
+ # skip check if not selected
+ if args.check and not re.search(args.check, check):
+ continue
+
+ if args.skip_muted and ('muted' in status and status['muted']):
+ continue
+
+ check_detail = "%s( %s )" % (check, status['summary']['message'])
+
+ if status["severity"] == "HEALTH_ERR":
+ extended.append(msg)
+ msg = "CRITICAL: %s" % check_detail
+ ret = STATUS_ERROR
+ continue
+
+ if args.whitelist and re.search(args.whitelist,status['summary']['message']):
+ continue
+
+ check_msg = "WARNING: %s" % check_detail
+ if not msg:
+ msg = check_msg
+ ret = STATUS_WARNING
+ else:
+ extended.append(check_msg)
+ else:
+ #pre-luminous
+ for status in output["summary"]:
+ if status != "HEALTH_OK":
+ if status == "HEALTH_ERROR":
+ msg = "CRITICAL: %s" % status['summary']
+ ret = STATUS_ERROR
+ continue
+
+ if args.whitelist and re.search(args.whitelist,status['summary']):
+ continue
+
+ if not msg:
+ msg = "WARNING: %s" % status['summary']
+ ret = STATUS_WARNING
+ else:
+ extended.append("WARNING: %s" % status['summary'])
+
+ if msg:
+ print(msg)
+ else:
+ print("HEALTH OK")
+ if extended: print('\n'.join(extended))
+ return ret
+
+
+ elif err:
+ # read only first line of error
+ one_line = err.split('\n')[0]
+ if '-1 ' in one_line:
+ idx = one_line.rfind('-1 ')
+ print('ERROR: %s: %s' % (ceph_exec, one_line[idx+len('-1 '):]))
+ else:
+ print(one_line)
+
+ return STATUS_UNKNOWN
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_mds b/check_ceph_mds
new file mode 100755
index 0000000..40432fd
--- /dev/null
+++ b/check_ceph_mds
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Catalyst IT http://www.catalyst.net.nz
+# Copyright (c) 2015 SWITCH http://www.switch.ch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import print_function
+import argparse
+import socket
+import os
+import re
+import subprocess
+import sys
+import json
+
+__version__ = '1.6.0'
+
+# default ceph values
+CEPH_EXEC = '/usr/bin/ceph'
+CEPH_COMMAND = 'mds stat -f json'
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_ERROR = 2
+STATUS_UNKNOWN = 3
+
+def main():
+ # parse args
+ parser = argparse.ArgumentParser(description="'ceph mds stat' nagios plugin.")
+ parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_EXEC)
+ parser.add_argument('-c','--conf', help='alternative ceph conf file')
+ parser.add_argument('-m','--monaddress', help='ceph monitor to use for queries (address[:port])')
+ parser.add_argument('-i','--id', help='ceph client id')
+ parser.add_argument('-k','--keyring', help='ceph client keyring file')
+ parser.add_argument('-V','--version', help='show version and exit', action='store_true')
+ parser.add_argument('-n','--name', help='mds daemon name', required=True)
+ parser.add_argument('-f','--filesystem', help='mds filesystem name', required=True)
+ args = parser.parse_args()
+
+ if args.version:
+ print('version %s' % __version__)
+ return STATUS_OK
+
+ # validate args
+ ceph_exec = args.exe if args.exe else CEPH_EXEC
+ if not os.path.exists(ceph_exec):
+ print("MDS ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
+ return STATUS_UNKNOWN
+
+ if args.conf and not os.path.exists(args.conf):
+ print("MDS ERROR: ceph conf file '%s' doesn't exist" % args.conf)
+ return STATUS_UNKNOWN
+
+ if args.keyring and not os.path.exists(args.keyring):
+ print("MDS ERROR: keyring file '%s' doesn't exist" % args.keyring)
+ return STATUS_UNKNOWN
+
+ # build command
+ ceph_cmd = [ceph_exec]
+ if args.monaddress:
+ ceph_cmd.append('-m')
+ ceph_cmd.append(args.monaddress)
+ if args.conf:
+ ceph_cmd.append('-c')
+ ceph_cmd.append(args.conf)
+ if args.id:
+ ceph_cmd.append('--id')
+ ceph_cmd.append(args.id)
+ if args.keyring:
+ ceph_cmd.append('--keyring')
+ ceph_cmd.append(args.keyring)
+ ceph_cmd.extend(CEPH_COMMAND.split(' '))
+
+ # exec command
+ p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+
+ if p.returncode != 0 or not output:
+ print("MDS ERROR: %s" % err)
+ return STATUS_ERROR
+
+ # load json output and parse
+ mds_stat = None
+ try:
+ mds_stat = json.loads(output)
+ except Exception as e:
+ print("MDS ERROR: could not parse '%s' output: %s: %s" % (CEPH_COMMAND,output,e))
+ return STATUS_UNKNOWN
+
+ return check_target_mds(mds_stat, args.filesystem, args.name)
+
+def check_target_mds(mds_stat, fs_name, name):
+ # find mds from standby list
+ standby_mdss = _get_standby_mds(mds_stat)
+ for mds in standby_mdss:
+ if mds.get_name() == name:
+ print("MDS OK: %s" % (mds))
+ return STATUS_OK
+
+ # find mds from active list
+ active_mdss = _get_active_mds(mds_stat, fs_name)
+
+ if active_mdss:
+ for mds in active_mdss:
+ if mds.get_name() != name:
+ continue
+ # target mds in active list
+ print("MDS %s: %s" % ("WARN" if mds.is_laggy() else "OK", mds))
+ return STATUS_WARNING if mds.is_laggy() else STATUS_OK
+
+ # mds not found
+ print("MDS ERROR: MDS '%s' is not found (offline?)" % (name))
+ return STATUS_ERROR
+ else:
+ # fs not found in map, perhaps user input error
+ print("MDS ERROR: FS '%s' is not found in fsmap" % (fs_name))
+ return STATUS_ERROR
+
+def _get_standby_mds(mds_stat):
+ mds_array = []
+ for mds in mds_stat['fsmap']['standbys']:
+ name = mds['name']
+ state = mds['state']
+ laggy_since = mds['laggy_since'] if 'laggy_since' in mds else None
+ mds_array.append(MDS(name, state))
+
+ return mds_array
+
+def _get_active_mds(mds_stat, fs_name):
+ mds_fs = mds_stat['fsmap']['filesystems']
+
+ # find filesystem in stat
+ for i in range(len(mds_fs)):
+ mdsmap = mds_fs[i]['mdsmap']
+ if mdsmap['fs_name'] != fs_name:
+ continue
+ # put mds to array
+ mds_array = []
+ infos = mds_stat['fsmap']['filesystems'][i]['mdsmap']['info']
+ for gid in infos:
+ name = infos[gid]['name']
+ state = infos[gid]['state']
+ laggy_since = infos[gid]['laggy_since'] if 'laggy_since' in infos[gid] else None
+ mds_array.append(MDS(name, state, laggy_since))
+
+ return mds_array
+
+ # no fs found
+ return None
+
+class MDS(object):
+ def __init__(self, name, state, laggy_since=None):
+ self.name = name
+ self.state = state
+ self.laggy_since = laggy_since
+
+ def get_name(self):
+ return self.name
+
+ def get_state(self):
+ return self.state
+
+ def is_laggy(self):
+ return self.laggy_since is not None
+
+ def __str__(self):
+ msg = "MDS '%s' is %s" % (self.name, self.state)
+ if self.laggy_since is not None:
+ msg += " (laggy or crashed)"
+ return msg
+
+# main
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_mgr b/check_ceph_mgr
new file mode 100755
index 0000000..29ce8e8
--- /dev/null
+++ b/check_ceph_mgr
@@ -0,0 +1,188 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2018 SWITCH http://www.switch.ch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import argparse
+import os
+import subprocess
+import sys
+import json
+
+__version__ = '1.0.0'
+
+# default ceph values
+CEPH_EXEC = '/usr/bin/ceph'
+CEPH_COMMAND = 'mgr dump -f json'
+
+CEPH_MGR_DUMP_EXAMPLE = '''
+$ ceph --version
+ceph version 12.2.7 (3ec878d1e53e1aeb47a9f619c49d9e7c0aa384d5) luminous (stable)
+$ ceph mgr dump -f json|jq .
+{
+ "epoch": 165,
+ "active_gid": 248001409,
+ "active_name": "zhdk0013",
+ "active_addr": "10.10.10.9:6800/810408",
+ "available": true,
+ "standbys": [
+ {
+ "gid": 247991934,
+ "name": "zhdk0009",
+ "available_modules": [
+ "balancer",
+ "dashboard",
+ "influx",
+ "localpool",
+ "prometheus",
+ "restful",
+ "selftest",
+ "status",
+ "zabbix"
+ ]
+ },
+ {
+ "gid": 248011196,
+ "name": "zhdk0025",
+ "available_modules": [
+ "balancer",
+ "dashboard",
+ "influx",
+ "localpool",
+ "prometheus",
+ "restful",
+ "selftest",
+ "status",
+ "zabbix"
+ ]
+ }
+ ],
+ "modules": [
+ "balancer",
+ "restful",
+ "status"
+ ],
+ "available_modules": [
+ "balancer",
+ "dashboard",
+ "influx",
+ "localpool",
+ "prometheus",
+ "restful",
+ "selftest",
+ "status",
+ "zabbix"
+ ],
+ "services": {}
+}
+'''
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_ERROR = 2
+STATUS_UNKNOWN = 3
+
+
+def main():
+ # parse args
+ parser = argparse.ArgumentParser(description="'ceph mgr dump' nagios plugin.")
+ parser.add_argument('-e', '--exe', help='ceph executable [%s]' % CEPH_EXEC)
+ parser.add_argument('-c', '--conf', help='alternative ceph conf file')
+ parser.add_argument('-m', '--monaddress', help='ceph monitor to use for queries (address[:port])')
+ parser.add_argument('-i', '--id', help='ceph client id')
+ parser.add_argument('-n', '--name', help='ceph client name')
+ parser.add_argument('-k', '--keyring', help='ceph client keyring file')
+ parser.add_argument('-V', '--version', help='show version and exit', action='store_true')
+ args = parser.parse_args()
+
+ if args.version:
+ print("version {}".format(__version__))
+ return STATUS_OK
+
+ # validate args
+ ceph_exec = args.exe if args.exe else CEPH_EXEC
+ if not os.path.exists(ceph_exec):
+ print("MGR ERROR: ceph executable '{}' doesn't exist".format(ceph_exec))
+ return STATUS_UNKNOWN
+
+ if args.conf and not os.path.exists(args.conf):
+ print("MGR ERROR: ceph conf file '{}' doesn't exist".format(args.conf))
+ return STATUS_UNKNOWN
+
+ if args.keyring and not os.path.exists(args.keyring):
+ print("MGR ERROR: keyring file '{}' doesn't exist".format(args.keyring))
+ return STATUS_UNKNOWN
+
+ # build command
+ ceph_cmd = [ceph_exec]
+ if args.monaddress:
+ ceph_cmd.append('-m')
+ ceph_cmd.append(args.monaddress)
+ if args.conf:
+ ceph_cmd.append('-c')
+ ceph_cmd.append(args.conf)
+ if args.id:
+ ceph_cmd.append('--id')
+ ceph_cmd.append(args.id)
+ if args.name:
+ ceph_cmd.append('--name')
+ ceph_cmd.append(args.name)
+ if args.keyring:
+ ceph_cmd.append('--keyring')
+ ceph_cmd.append(args.keyring)
+ ceph_cmd.extend(CEPH_COMMAND.split(' '))
+
+ # exec command
+ p = subprocess.Popen(ceph_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+ output, err = p.communicate()
+
+ if p.returncode != 0 or not output:
+ print("MGR ERROR: {}".format(err))
+ return STATUS_UNKNOWN
+
+ # load json output and parse
+ mgr_dump = None
+ try:
+ mgr_dump = json.loads(output)
+ except Exception as e:
+ print("MGR ERROR: could not parse '{}' output: {}: {}".format(ceph_cmd, output, e))
+ return STATUS_UNKNOWN
+
+ # check active
+ if 'active_name' not in mgr_dump:
+ print("MGR CRITICAL: not active mgr found")
+ print("JSON: {}".format(json.dumps(mgr_dump)))
+ return STATUS_ERROR
+
+ active_mgr_name = mgr_dump['active_name']
+ # check standby
+ standby_mgr_names = []
+ for standby_mgr in mgr_dump['standbys']:
+ standby_mgr_names.append(standby_mgr['name'])
+
+ if len(standby_mgr_names) <= 0:
+ print("MGR WARN: active: {} but no standbys".format(active_mgr_name))
+ return STATUS_WARNING
+ else:
+ print("MGR OK: active: {}, standbys: {}".format(active_mgr_name,
+ ", ".join(standby_mgr_names)))
+ return STATUS_OK
+
+# main
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_mon b/check_ceph_mon
new file mode 100755
index 0000000..f8decea
--- /dev/null
+++ b/check_ceph_mon
@@ -0,0 +1,163 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Catalyst IT http://www.catalyst.net.nz
+# Copyright (c) 2015 SWITCH http://www.switch.ch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import argparse
+import socket
+import os
+import re
+import subprocess
+import sys
+import json
+
+__version__ = '1.5.0'
+
+# default ceph values
+CEPH_EXEC = '/usr/bin/ceph'
+CEPH_COMMAND = 'quorum_status'
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_ERROR = 2
+STATUS_UNKNOWN = 3
+
+##
+# ceph quorum_status output example
+##
+ceph_quorum_status_output_example = '''{
+ "quorum_leader_name" : "s0001",
+ "monmap" : {
+ "mons" : [
+ {
+ "name" : "s0001",
+ "addr" : "[2001:620:5ca1:8000::1001]:6789/0",
+ "rank" : 0
+ },
+ {
+ "name" : "s0003",
+ "addr" : "[2001:620:5ca1:8000::1003]:6789/0",
+ "rank" : 1
+ }
+ ],
+ "created" : "2014-12-15 08:28:35.153650",
+ "epoch" : 2,
+ "modified" : "2014-12-15 08:28:40.371878",
+ "fsid" : "22348d2b-b69d-46cc-9a79-ca93cd6bae84"
+ },
+ "quorum_names" : [
+ "s0001",
+ "s0003"
+ ],
+ "quorum" : [
+ 0,
+ 1
+ ],
+ "election_epoch" : 24
+}'''
+
+def main():
+
+ # parse args
+ parser = argparse.ArgumentParser(description="'ceph quorum_status' nagios plugin.")
+ parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_EXEC)
+ parser.add_argument('-c','--conf', help='alternative ceph conf file')
+ parser.add_argument('-m','--monaddress', help='ceph monitor to use for queries (address[:port])')
+ parser.add_argument('-i','--id', help='ceph client id')
+ parser.add_argument('-k','--keyring', help='ceph client keyring file')
+ parser.add_argument('-V','--version', help='show version and exit', action='store_true')
+ parser.add_argument('-I','--monid', help='mon ID to be checked for availability')
+ args = parser.parse_args()
+
+ if args.version:
+ print('version %s' % __version__)
+ return STATUS_OK
+
+ # validate args
+ ceph_exec = args.exe if args.exe else CEPH_EXEC
+ if not os.path.exists(ceph_exec):
+ print("MON ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
+ return STATUS_UNKNOWN
+
+ if args.conf and not os.path.exists(args.conf):
+ print("MON ERROR: ceph conf file '%s' doesn't exist" % args.conf)
+ return STATUS_UNKNOWN
+
+ if args.keyring and not os.path.exists(args.keyring):
+ print("MON ERROR: keyring file '%s' doesn't exist" % args.keyring)
+ return STATUS_UNKNOWN
+
+ if not args.monid:
+ print("MON ERROR: no MON ID given, use -I/--monid parameter")
+ return STATUS_UNKNOWN
+
+ # build command
+ ceph_cmd = [ceph_exec]
+ if args.monaddress:
+ ceph_cmd.append('-m')
+ ceph_cmd.append(args.monaddress)
+ if args.conf:
+ ceph_cmd.append('-c')
+ ceph_cmd.append(args.conf)
+ if args.id:
+ ceph_cmd.append('--id')
+ ceph_cmd.append(args.id)
+ if args.keyring:
+ ceph_cmd.append('--keyring')
+ ceph_cmd.append(args.keyring)
+ ceph_cmd.append(CEPH_COMMAND)
+
+ # exec command
+ p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+
+ if p.returncode != 0 or not output:
+ print("MON ERROR: %s" % err)
+ return STATUS_ERROR
+
+ # load json output and parse
+ quorum_status = False
+ try:
+ quorum_status = json.loads(output)
+ except Exception as e:
+ print("MON ERROR: could not parse '%s' output: %s: %s" % (CEPH_COMMAND,output,e))
+ return STATUS_UNKNOWN
+
+ #print "XXX: quorum_status['quorum_names']:", quorum_status['quorum_names']
+
+ # do our checks
+ is_monitor = False
+ for mon in quorum_status['monmap']['mons']:
+ if mon['name'] == args.monid:
+ is_monitor = True
+ if not is_monitor:
+ print("MON WARN: mon '%s' is not in monmap: %s" % (args.monid,quorum_status['monmap']['mons']))
+ return STATUS_WARNING
+
+ in_quorum = args.monid in quorum_status['quorum_names']
+ if in_quorum:
+ print("MON OK")
+ return STATUS_OK
+ else:
+ print("MON WARN: no MON '%s' found in quorum" % args.monid)
+ return STATUS_WARNING
+
+# main
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_osd b/check_ceph_osd
new file mode 100755
index 0000000..2ee9de6
--- /dev/null
+++ b/check_ceph_osd
@@ -0,0 +1,154 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2013 Catalyst IT http://www.catalyst.net.nz
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# 1.5.2 (2019-06-16) Martin Seener: fixed regex to work with Ceph Nautilus (14.2.x)
+
+from __future__ import print_function
+import argparse
+import os
+import re
+import subprocess
+import sys
+import socket
+
+__version__ = '1.5.2'
+
+# default ceph values
+CEPH_COMMAND = '/usr/bin/ceph'
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_ERROR = 2
+STATUS_UNKNOWN = 3
+
+def main():
+
+ # parse args
+ parser = argparse.ArgumentParser(description="'ceph osd' nagios plugin.")
+ parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
+ parser.add_argument('-c','--conf', help='alternative ceph conf file')
+ parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
+ parser.add_argument('-i','--id', help='ceph client id')
+ parser.add_argument('-k','--keyring', help='ceph client keyring file')
+ parser.add_argument('-V','--version', help='show version and exit', action='store_true')
+ parser.add_argument('-H','--host', help='osd host', required=True)
+ parser.add_argument('-I','--osdid', help='osd id', required=False)
+ parser.add_argument('-C','--crit', help='Number of failed OSDs to trigger critical (default=2)',type=int,default=2, required=False)
+ parser.add_argument('-o','--out', help='check osds that are set OUT', default=False, action='store_true', required=False)
+ args = parser.parse_args()
+
+ # validate args
+ ceph_exec = args.exe if args.exe else CEPH_COMMAND
+ if not os.path.exists(ceph_exec):
+ print("OSD ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
+ return STATUS_UNKNOWN
+
+ if args.version:
+ print('version %s' % __version__)
+ return STATUS_OK
+
+ if args.conf and not os.path.exists(args.conf):
+ print("OSD ERROR: ceph conf file '%s' doesn't exist" % args.conf)
+ return STATUS_UNKNOWN
+
+ if args.keyring and not os.path.exists(args.keyring):
+ print("OSD ERROR: keyring file '%s' doesn't exist" % args.keyring)
+ return STATUS_UNKNOWN
+
+ if not args.osdid:
+ args.osdid = '[^ ]*'
+
+ if not args.host:
+ print("OSD ERROR: no OSD hostname given")
+ return STATUS_UNKNOWN
+
+ try:
+ addrinfo = socket.getaddrinfo(args.host, None, 0, socket.SOCK_STREAM)
+ args.host = addrinfo[0][-1][0]
+ if addrinfo[0][0] == socket.AF_INET6:
+ args.host = "[%s]" % args.host
+ except:
+ print('OSD ERROR: could not resolve %s' % args.host)
+ return STATUS_UNKNOWN
+
+
+ # build command
+ ceph_cmd = [ceph_exec]
+ if args.monaddress:
+ ceph_cmd.append('-m')
+ ceph_cmd.append(args.monaddress)
+ if args.conf:
+ ceph_cmd.append('-c')
+ ceph_cmd.append(args.conf)
+ if args.id:
+ ceph_cmd.append('--id')
+ ceph_cmd.append(args.id)
+ if args.keyring:
+ ceph_cmd.append('--keyring')
+ ceph_cmd.append(args.keyring)
+ ceph_cmd.append('osd')
+ ceph_cmd.append('dump')
+
+ # exec command
+ p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+ output = output.decode('utf8')
+
+ if err or not output:
+ print("OSD ERROR: %s" % err)
+ return STATUS_ERROR
+
+ # escape IPv4 host address
+ osd_host = args.host.replace('.', '\.')
+ # escape IPv6 host address
+ osd_host = osd_host.replace('[', '\[')
+ osd_host = osd_host.replace(']', '\]')
+ up = re.findall(r"^(osd\.%s) up.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
+ if args.out:
+ down = re.findall(r"^(osd\.%s) down.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
+ down_in = re.findall(r"^(osd\.%s) down[ ]+in.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
+ down_out = re.findall(r"^(osd\.%s) down[ ]+out.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
+ else:
+ down = re.findall(r"^(osd\.%s) down[ ]+in.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
+ down_in = down
+ down_out = re.findall(r"^(osd\.%s) down[ ]+out.*%s:" % (args.osdid, osd_host), output, re.MULTILINE)
+
+ if down:
+ print("OSD %s: Down OSD%s on %s: %s" % ('CRITICAL' if len(down)>=args.crit else 'WARNING' ,'s' if len(down)>1 else '', args.host, " ".join(down)))
+ print("Up OSDs: " + " ".join(up))
+ print("Down+In OSDs: " + " ".join(down_in))
+ print("Down+Out OSDs: " + " ".join(down_out))
+ print("| 'osd_up'=%d 'osd_down_in'=%d;;%d 'osd_down_out'=%d;;%d" % (len(up), len(down_in), args.crit, len(down_out), args.crit))
+ if len(down)>=args.crit:
+ return STATUS_ERROR
+ else:
+ return STATUS_WARNING
+
+ if up:
+ print("OSD OK")
+ print("Up OSDs: " + " ".join(up))
+ print("Down+In OSDs: " + " ".join(down_in))
+ print("Down+Out OSDs: " + " ".join(down_out))
+ print("| 'osd_up'=%d 'osd_down_in'=%d;;%d 'osd_down_out'=%d;;%d" % (len(up), len(down_in), args.crit, len(down_out), args.crit))
+ return STATUS_OK
+
+ print("OSD WARN: no OSD.%s found on host %s" % (args.osdid, args.host))
+ return STATUS_WARNING
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_osd_db b/check_ceph_osd_db
new file mode 100755
index 0000000..de3df92
--- /dev/null
+++ b/check_ceph_osd_db
@@ -0,0 +1,152 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2020 Binero AB https://binero.com
+# Copyright (c) 2013 Catalyst IT http://www.catalyst.net.nz
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import os
+import re
+import subprocess
+import sys
+import socket
+import json
+
+
+CEPH_COMMAND = '/usr/bin/ceph'
+
+STATUS_OK = 0
+STATUS_CRITICAL = 2
+STATUS_UNKNOWN = 3
+
+
+def main():
+ parser = argparse.ArgumentParser(description="'ceph osd' nagios plugin.")
+
+ parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
+ parser.add_argument('-c','--conf', help='alternative ceph conf file')
+ parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
+ parser.add_argument('-i','--id', help='ceph client id')
+ parser.add_argument('-k','--keyring', help='ceph client keyring file')
+ parser.add_argument('-H','--host', help='osd host', required=True)
+ parser.add_argument('-C','--critical', help='critical threshold', default=60)
+
+ args = parser.parse_args()
+
+ ceph_exec = args.exe if args.exe else CEPH_COMMAND
+ if not os.path.exists(ceph_exec):
+ print "UNKNOWN: ceph executable '%s' doesn't exist" % ceph_exec
+ return STATUS_UNKNOWN
+
+ if args.conf and not os.path.exists(args.conf):
+ print "UNKNOWN: ceph conf file '%s' doesn't exist" % args.conf
+ return STATUS_UNKNOWN
+
+ if args.keyring and not os.path.exists(args.keyring):
+ print "UNKNOWN: keyring file '%s' doesn't exist" % args.keyring
+ return STATUS_UNKNOWN
+
+ if not args.host:
+ print "UNKNOWN: no OSD hostname given"
+ return STATUS_UNKNOWN
+
+ try:
+ addrinfo = socket.getaddrinfo(args.host, None, 0, socket.SOCK_STREAM)
+ args.host = addrinfo[0][-1][0]
+ if addrinfo[0][0] == socket.AF_INET6:
+ args.host = "[%s]" % args.host
+ except Exception:
+ print 'UNKNOWN: could not resolve %s' % args.host
+ return STATUS_UNKNOWN
+
+ ceph_cmd = [ceph_exec]
+ if args.monaddress:
+ ceph_cmd.append('-m')
+ ceph_cmd.append(args.monaddress)
+ if args.conf:
+ ceph_cmd.append('-c')
+ ceph_cmd.append(args.conf)
+ if args.id:
+ ceph_cmd.append('--id')
+ ceph_cmd.append(args.id)
+ if args.keyring:
+ ceph_cmd.append('--keyring')
+ ceph_cmd.append(args.keyring)
+
+ ceph_cmd.append('osd')
+ ceph_cmd.append('dump')
+
+ p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+
+ if err or not output:
+ print "CRITICAL: %s" % err
+ return STATUS_CRITICAL
+
+ # escape IPv4 host address
+ osd_host = args.host.replace('.', '\.')
+ # escape IPv6 host address
+ osd_host = osd_host.replace('[', '\[')
+ osd_host = osd_host.replace(']', '\]')
+
+ osds_up = re.findall(r"^(osd\.[^ ]*) up.*%s:" % (osd_host), output, re.MULTILINE)
+
+ final_status = STATUS_OK
+ lines = []
+
+ for osd in osds_up:
+ daemon_ceph_cmd = [ceph_exec, '--format', 'json']
+ daemon_ceph_cmd.append('daemon')
+ daemon_ceph_cmd.append(osd)
+ daemon_ceph_cmd.append('perf')
+ daemon_ceph_cmd.append('dump')
+
+ p = subprocess.Popen(daemon_ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+
+ if err or not output:
+ print "CRITICAL: %s" % err
+ return STATUS_CRITICAL
+
+ try:
+ data = json.loads(output)
+ except Exception:
+ print "CRITICAL: failed to load json"
+ return STATUS_CRITICAL
+
+ bluefs = data.get('bluefs', None)
+
+ if not bluefs:
+ continue
+
+ db_total_bytes = bluefs.get('db_total_bytes')
+ db_used_bytes = bluefs.get('db_used_bytes')
+ perc = (float(db_used_bytes) / float(db_total_bytes) * 100)
+
+ if perc >= args.critical and final_status == STATUS_OK:
+ final_status = STATUS_CRITICAL
+
+ lines.append("%s=%.2f%%" % (osd, perc))
+
+ if final_status == STATUS_OK:
+ print "OK: %s" % (' '.join(lines))
+ else:
+ print "CRITICAL: %s" % (' '.join(lines))
+
+ return final_status
+
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_osd_df b/check_ceph_osd_df
new file mode 100755
index 0000000..0c91dc2
--- /dev/null
+++ b/check_ceph_osd_df
@@ -0,0 +1,153 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# check_ceph_osd_df - Check OSD DF output
+# Copyright (c) 2020 noris network AG https://www.noris.de
+#
+# This plugin will not output perfdata as there is likely a lot of output
+# which should be gathered using other tools.
+#
+# Parts based on code from check_ceph_df which is
+# Copyright (c) 2013 SWITCH http://www.switch.ch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+from __future__ import print_function
+import argparse
+import os
+import subprocess
+import sys
+import json
+from operator import itemgetter
+
+# Semver
+__version__ = '1.0.0'
+
+# default ceph values
+CEPH_COMMAND = '/usr/bin/ceph'
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_ERROR = 2
+STATUS_UNKNOWN = 3
+
+def main():
+
+ # parse args
+ parser = argparse.ArgumentParser(description="'ceph osd df' nagios plugin.")
+ parser.add_argument('-e','--exe', help='ceph executable [%s]' % CEPH_COMMAND)
+ parser.add_argument('-c','--conf', help='alternative ceph conf file')
+ parser.add_argument('-m','--monaddress', help='ceph monitor address[:port]')
+ parser.add_argument('-i','--id', help='ceph client id')
+ parser.add_argument('-n','--name', help='ceph client name')
+ parser.add_argument('-k','--keyring', help='ceph client keyring file')
+ parser.add_argument('-W','--warn', help="warn above this percent USED", type=float)
+ parser.add_argument('-C','--critical', help="critical alert above this percent USED", type=float)
+ parser.add_argument('-V','--version', help='show version and exit', action='store_true')
+ args = parser.parse_args()
+
+ # validate args
+ ceph_exec = args.exe if args.exe else CEPH_COMMAND
+ if not os.path.exists(ceph_exec):
+ print("ERROR: ceph executable '%s' doesn't exist" % ceph_exec)
+ return STATUS_UNKNOWN
+
+ if args.version:
+ print('version %s' % __version__)
+ return STATUS_OK
+
+ if args.conf and not os.path.exists(args.conf):
+ print("ERROR: ceph conf file '%s' doesn't exist" % args.conf)
+ return STATUS_UNKNOWN
+
+ if args.keyring and not os.path.exists(args.keyring):
+ print("ERROR: keyring file '%s' doesn't exist" % args.keyring)
+ return STATUS_UNKNOWN
+
+ if not args.warn or not args.critical or args.warn > args.critical:
+ print("ERROR: warn and critical level must be set and critical must be greater than warn")
+ return STATUS_UNKNOWN
+
+ # build command
+ ceph_osd_df = [ceph_exec]
+ if args.monaddress:
+ ceph_osd_df.append('-m')
+ ceph_osd_df.append(args.monaddress)
+ if args.conf:
+ ceph_osd_df.append('-c')
+ ceph_osd_df.append(args.conf)
+ if args.id:
+ ceph_osd_df.append('--id')
+ ceph_osd_df.append(args.id)
+ if args.name:
+ ceph_osd_df.append('--name')
+ ceph_osd_df.append(args.name)
+ if args.keyring:
+ ceph_osd_df.append('--keyring')
+ ceph_osd_df.append(args.keyring)
+ ceph_osd_df.append('osd')
+ ceph_osd_df.append('df')
+ ceph_osd_df.append('--format=json')
+
+ # exec command
+ p = subprocess.Popen(ceph_osd_df,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+
+ # parse output
+ # print "DEBUG: output:", output
+ # print "DEBUG: err:", err
+ if output:
+ # parse output
+ try:
+ result = json.loads(output)
+ check_return_value = STATUS_OK
+ nodes_sorted = sorted(result["nodes"], key=itemgetter('utilization','id'))
+
+ warn_crit_osds = []
+
+ for node in reversed(nodes_sorted):
+ if node["utilization"] >= args.warn and check_return_value is not STATUS_ERROR:
+ check_return_value = STATUS_WARNING
+ warn_crit_osds.append("{}={:04.2f}".format(node["name"], node["utilization"]))
+
+ if node["utilization"] >= args.critical:
+ check_return_value = STATUS_ERROR
+ warn_crit_osds.append("{}={:04.2f}".format(node["name"], node["utilization"]))
+
+ if check_return_value == STATUS_OK:
+ print("OK: All OSDs within limits")
+ return STATUS_OK
+ elif check_return_value == STATUS_WARNING:
+ print("WARNING: OSD usage above warn threshold: {:.4054}".format(", ".join(warn_crit_osds)))
+ return STATUS_WARNING
+ elif check_return_value == STATUS_ERROR:
+ print("CRITICAL: OSD usage above critical or warn threshold: {:.4041}".format(", ".join(warn_crit_osds)))
+ return STATUS_ERROR
+ except:
+ print("ERROR: {}".format(sys.exc_info()[0]))
+ return STATUS_UNKNOWN
+ elif err:
+ # read only first line of error
+ one_line = err.split('\n')[0]
+ if '-1 ' in one_line:
+ idx = one_line.rfind('-1 ')
+ print('ERROR: %s: %s' % (ceph_exec, one_line[idx+len('-1 '):]))
+ else:
+ print(one_line)
+
+ return STATUS_UNKNOWN
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_rgw b/check_ceph_rgw
new file mode 100755
index 0000000..b0af8a7
--- /dev/null
+++ b/check_ceph_rgw
@@ -0,0 +1,118 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2014 Catalyst IT http://www.catalyst.net.nz
+# Copyright (c) 2015 SWITCH http://www.switch.ch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+from __future__ import print_function
+import argparse
+import os
+import re
+import subprocess
+import sys
+import json
+
+__version__ = '1.5.1'
+
+# default ceph values
+RGW_COMMAND = '/usr/bin/radosgw-admin'
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_ERROR = 2
+STATUS_UNKNOWN = 3
+
+def main():
+
+ # parse args
+ parser = argparse.ArgumentParser(description="'radosgw-admin bucket stats' nagios plugin.")
+ parser.add_argument('-d','--detail', help='output perf data for all buckets', action='store_true')
+ parser.add_argument('-B','--byte', help='output perf data in Byte instead of KB', action='store_true')
+ parser.add_argument('-e','--exe', help='radosgw-admin executable [%s]' % RGW_COMMAND)
+ parser.add_argument('-c','--conf', help='alternative ceph conf file')
+ parser.add_argument('-i','--id', help='ceph client id')
+ parser.add_argument('-n','--name', help='ceph client name (type.id)')
+ parser.add_argument('-V','--version', help='show version and exit', action='store_true')
+ args = parser.parse_args()
+
+ # validate args
+ rgw_exec = args.exe if args.exe else RGW_COMMAND
+ if not os.path.exists(rgw_exec):
+ print("RGW ERROR: radosgw-admin executable '%s' doesn't exist" % rgw_exec)
+ return STATUS_UNKNOWN
+
+ if args.version:
+ print('version %s' % __version__)
+ return STATUS_OK
+
+ if args.conf and not os.path.exists(args.conf):
+ print("RGW ERROR: ceph conf file '%s' doesn't exist" % args.conf)
+ return STATUS_UNKNOWN
+
+ # build command
+ rgw_cmd = [rgw_exec]
+ if args.conf:
+ rgw_cmd.append('-c')
+ rgw_cmd.append(args.conf)
+ if args.id:
+ rgw_cmd.append('--id')
+ rgw_cmd.append(args.id)
+ if args.name:
+ rgw_cmd.append('-n')
+ rgw_cmd.append(args.name)
+ rgw_cmd.append('bucket')
+ rgw_cmd.append('stats')
+
+ # exec command
+ p = subprocess.Popen(rgw_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
+ output, err = p.communicate()
+
+ if p.returncode != 0 or not output:
+ print("RGW ERROR: %s :: %s" % (output, err))
+ return STATUS_ERROR
+
+ bucket_stats = json.loads(output)
+ #print bucket_stats
+
+ buckets = []
+ for i in bucket_stats:
+ if type(i) is dict:
+ bucket_name = i['bucket']
+ usage_dict = i['usage']
+ if usage_dict and 'rgw.main' in usage_dict:
+ bucket_usage_kb = usage_dict['rgw.main']['size_kb_actual']
+ else:
+ bucket_usage_kb = 0
+ buckets.append((bucket_name, bucket_usage_kb))
+ buckets_total_kb = sum([b[1] for b in buckets])
+
+ if args.byte:
+ status = "RGW OK: {} buckets, {} KB total | /={}B ".format(len(buckets),buckets_total_kb,buckets_total_kb*1024)
+ else:
+ status = "RGW OK: {} buckets, {} KB total | /={}KB ".format(len(buckets),buckets_total_kb,buckets_total_kb)
+ #print buckets
+ if buckets and args.detail:
+ if args.byte:
+ status = status + " ".join(["{}={}B".format(b[0],b[1]*1024) for b in buckets])
+ else:
+ status = status + " ".join(["{}={}KB".format(b[0],b[1]) for b in buckets])
+
+ print(status)
+ return STATUS_OK
+
+if __name__ == "__main__":
+ sys.exit(main())
diff --git a/check_ceph_rgw_api b/check_ceph_rgw_api
new file mode 100755
index 0000000..02d2376
--- /dev/null
+++ b/check_ceph_rgw_api
@@ -0,0 +1,116 @@
+#!/usr/bin/env python3
+# -*- coding: utf-8 -*-
+#
+# Copyright (c) 2014 Catalyst IT http://www.catalyst.net.nz
+# Copyright (c) 2015 SWITCH http://www.switch.ch
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from __future__ import print_function
+import requests
+import warnings
+import json
+import argparse
+import sys
+from awsauth import S3Auth
+
+__version__ = '1.7.2'
+
+# nagios exit code
+STATUS_OK = 0
+STATUS_WARNING = 1
+STATUS_CRITICAL = 2
+STATUS_UNKNOWN = 3
+
+def main():
+
+ # parse args
+ parser = argparse.ArgumentParser(description="'radosgw api bucket stats' nagios plugin.")
+ parser.add_argument('-H', '--host', help="Server URL for the radosgw api (example: http://objects.dreamhost.com/)", required=True)
+ parser.add_argument('-k', '--insecure', help="Allow insecure server connections when using SSL", action="store_false")
+ parser.add_argument('-e', '--admin_entry', help="The entry point for an admin request URL [default is '%(default)s']", default="admin")
+ parser.add_argument('-a', '--access_key', help="S3 access key", required=True)
+ parser.add_argument('-s', '--secret_key', help="S3 secret key", required=True)
+ parser.add_argument('-d', '--detail', help="output perf data for all buckets", action="store_true")
+ parser.add_argument('-b', '--byte', help="output perf data in Byte instead of KB", action="store_true")
+ parser.add_argument('-v', '--version', help='show version and exit', action="store_true")
+ args = parser.parse_args()
+
+ if args.version:
+ print("version {0}".format(__version__))
+ return STATUS_OK
+
+ # helpers for default schema
+ if not args.host.startswith("http"):
+ args.host = "http://{0}".format(args.host)
+ # and for request_uri
+ if not args.host.endswith("/"):
+ args.host = "{0}/".format(args.host)
+
+ url = "{0}{1}/bucket?format=json&stats=True".format(args.host,
+ args.admin_entry)
+
+ try:
+ # Inversion of condition, when '--insecure' is defined we disable
+ # requests warning about certificate hostname mismatch.
+ if not args.insecure:
+ warnings.filterwarnings('ignore', message='Unverified HTTPS request')
+
+ response = requests.get(url, verify=args.insecure,
+ auth=S3Auth(args.access_key, args.secret_key,
+ args.host))
+
+ if response.status_code == requests.codes.ok:
+ bucket_stats = response.json()
+ else:
+ # no usage caps or wrong admin entry
+ print("RGW ERROR [{0}]: {1}".format(response.status_code,
+ response.content.decode('utf-8')))
+ return STATUS_WARNING
+
+# DNS, connection errors, etc
+ except requests.exceptions.RequestException as e:
+ print("RGW ERROR: {0}".format(e))
+ return STATUS_UNKNOWN
+
+ #print(bucket_stats)
+ buckets = []
+ for i in bucket_stats:
+ if type(i) is dict:
+ bucket_name = i['bucket']
+ usage_dict = i['usage']
+ if usage_dict and 'rgw.main' in usage_dict:
+ bucket_usage_kb = usage_dict['rgw.main']['size_kb_actual']
+ else:
+ bucket_usage_kb = 0
+ buckets.append((bucket_name, bucket_usage_kb))
+ buckets_total_kb = sum([b[1] for b in buckets])
+
+ status = "RGW OK: {0} buckets, {1} KB total | /={2}{3} "
+
+ if args.byte:
+ status = status.format(len(buckets), buckets_total_kb, buckets_total_kb*1024, "B")
+ else:
+ status = status.format(len(buckets), buckets_total_kb, buckets_total_kb, "KB")
+ #print(buckets)
+ if buckets and args.detail:
+ if args.byte:
+ status = status + " ".join(["{}={}B".format(b[0], b[1]*1024) for b in buckets])
+ else:
+ status = status + " ".join(["{}={}KB".format(b[0], b[1]) for b in buckets])
+
+ print(status)
+ return STATUS_OK
+
+if __name__ == "__main__":
+ sys.exit(main())