-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
204 lines (150 loc) · 9.33 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
#! /usr/bin/python3
# Simple flask app which listens for a json backup report payload and exposes basic metrics to prometheus
from flask import Flask, request
from flask_apscheduler import APScheduler
from werkzeug.middleware.dispatcher import DispatcherMiddleware
from prometheus_client import make_wsgi_app, Counter, Gauge, Summary
import duplicati_client
import os
import datetime
app = Flask(__name__)
scheduler = APScheduler()
STATE_ERROR = "Error"
STATE_WARNING = "Warning"
STATE_SUCCESS = "Success"
STATE_FATAL = "Fatal"
RESULT_STATES = [STATE_ERROR, STATE_WARNING, STATE_SUCCESS, STATE_FATAL]
RECENT_BACKUP_AGE_SEC = 30
SCHEDULED_MAINT_INTERVAL_SEC = 1
# For tracking recent backups
recent_backups = {}
success_percent = {}
# Add prometheus wsgi middleware to route /metrics requests
app.wsgi_app = DispatcherMiddleware(app.wsgi_app, {
'/metrics': make_wsgi_app()
})
result_counter = Counter('duplicati_backup_result_count', 'Count of backups that have ran', ['backup', 'result'])
result_recent_gauge = Gauge('duplicati_backup_result_recent_gauge', "Count of backups that occurred in the last {0}s. Resets to 0 if no recent backups".format(RECENT_BACKUP_AGE_SEC), ['backup', 'result'])
result_last_success_percent_gauge = Gauge('duplicati_backup_result_last_success_percent_gauge', "Percentage of Success vs non-Success for the last known backup".format(RECENT_BACKUP_AGE_SEC), ['backup'])
files_gauge = Gauge('duplicati_files', 'Number of added files', ['backup', 'operation'])
files_size_gauge = Gauge('duplicati_files_size', 'Size of added files', ['backup', 'operation'])
duration_summary = Summary('duplicati_backup_duration', 'How long the backup operation was running for', ['backup', 'result'])
def get_json_value(obj, key, default=None):
if key in obj:
return obj[key]
return default
def determine_success_percent(backup_name):
"""Calculates the success percentage (0.0 to 1.0) for a given backup.
If there's no known backup in the last RECENT_BACKUP_AGE_SEC seconds, the last known success percentage is used"""
if backup_name not in success_percent:
success_percent[backup_name] = 1
if backup_name not in recent_backups:
return 0 # Bad request, return 0
# Running totals
total_backups = 0
successful_backups = 0
for state in recent_backups[backup_name]:
total_backups += len(recent_backups[backup_name][state])
if state == STATE_SUCCESS:
successful_backups += len(recent_backups[backup_name][state])
if total_backups == 0:
return success_percent[backup_name] # No backups - return the last value
ratio = successful_backups / total_backups
success_percent[backup_name] = ratio
return ratio
def init_gauge_callbacks(backup_name, state):
"""Initalizes the gauges that use callbacks"""
print("Defining gauge for backup: {0} and state: {1}".format(backup_name, state))
if backup_name not in recent_backups:
recent_backups[backup_name] = {}
# result_last_success_percent_gauge is at the 'backup' level
result_last_success_percent_gauge.labels(backup=backup_name).set_function(lambda: determine_success_percent(backup_name))
if state not in recent_backups[backup_name]:
recent_backups[backup_name][state] = []
result_recent_gauge.labels(backup=backup_name, result=state).set_function(lambda: len(recent_backups[backup_name][state]))
def pre_seed_metrics(backup_name):
for state in RESULT_STATES:
result_counter.labels(backup=backup_name, result=state).inc(0)
init_gauge_callbacks(backup_name, state)
# Just seeding results for now. Can do others later if there's value
def maintain_recent_backups():
"""Called periodically to check if we have old backups in the recent_backups dict"""
for backup_name in recent_backups:
for state in recent_backups[backup_name]:
backup_state = recent_backups[backup_name][state]
if len(backup_state) > 0:
for backup_time in backup_state:
if backup_time < datetime.datetime.utcnow()-datetime.timedelta(seconds=RECENT_BACKUP_AGE_SEC):
backup_state.remove(backup_time)
def determine_duration_sec(duration_str):
"""Convert duration from string (ex: "00:00:04.4392890") to seconds"""
date_format = "%H:%M:%S.%f"
if duration_str is None:
return None
duration_str, ms = duration_str.split(".")
# if ms > 6 characters, truncate. %f doesn't support more than 6 and we don't care about that granularity
ms = (ms[:6]) if len(ms) > 6 else ms
duration_str = "{0}.{1}".format(duration_str, ms)
try:
duration_time = datetime.datetime.strptime(duration_str, date_format)
except ValueError:
print("ERROR: Unrecognized format for Duration. Got: {0}, expected format: {1}".format(duration_str, date_format))
return None
except:
print("ERROR: Caught unknown exception while processing duration: {0}".format(duration_str))
return None
delta = duration_time - datetime.datetime(1900, 1, 1)
return delta.total_seconds()
@app.route('/', methods=['POST'])
def main():
print("Received: {0}".format(request.method))
print("JSON: {0}".format(request.json))
# Main sections
extra = get_json_value(request.json, 'Extra')
data = get_json_value(request.json, 'Data')
# Extract values
backup_name = get_json_value(extra, 'backup-name')
result = get_json_value(data, 'ParsedResult')
duration_sec = determine_duration_sec(get_json_value(data, 'Duration'))
# Minimum required fields
if backup_name is None and result is None:
print("Invalid json. No backup name found")
return "Invalid json. No backup name found", 400
if backup_name is not None and result is None:
# We may have received an exception report. Example if source file doesn't exist:
# {'Data': {'ClassName': 'System.IO.IOException', 'Message': 'The source folder /t does not exist, aborting backup', 'Data': None, 'InnerException': None, 'HelpURL': None, 'StackTraceString': ' at Duplicati.Library.Main.Controller.ExpandInputSources (System.String[] inputsources, Duplicati.Library.Utility.IFilter filter) [0x002c4] in <8f1de655bd1240739a78684d845cecc8>:0 \n at Duplicati.Library.Main.Controller+<>c__DisplayClass14_0.<Backup>b__0 (Duplicati.Library.Main.BackupResults result) [0x0001d] in <8f1de655bd1240739a78684d845cecc8>:0 \n at Duplicati.Library.Main.Controller.RunAction[T] (T result, System.String[]& paths, Duplicati.Library.Utility.IFilter& filter, System.Action`1[T] method) [0x0011c] in <8f1de655bd1240739a78684d845cecc8>:0 ', 'RemoteStackTraceString': None, 'RemoteStackIndex': 0, 'ExceptionMethod': None, 'HResult': -2146232800, 'Source': 'Duplicati.Library.Main'}, 'Extra': {'OperationName': 'Backup', 'backup-name': 'Test'}, 'LogLines': []}
print("We probably caught an exception. Marking this as 'Fatal' status")
result = STATE_FATAL
# Save the values...
result_counter.labels(backup=backup_name, result=result).inc()
files_gauge.labels(backup=backup_name, operation='added').set(int(get_json_value(data, 'AddedFiles', default=0)))
files_size_gauge.labels(backup=backup_name, operation='added').set(int(get_json_value(data, 'SizeOfAddedFiles', default=0)))
files_gauge.labels(backup=backup_name, operation='deleted').set(int(get_json_value(data, 'DeletedFiles', default=0)))
files_gauge.labels(backup=backup_name, operation='modified').set(int(get_json_value(data, 'ModifiedFiles', default=0)))
files_size_gauge.labels(backup=backup_name, operation='modified').set(int(get_json_value(data, 'SizeOfModifiedFiles', default=0)))
files_gauge.labels(backup=backup_name, operation='examined').set(int(get_json_value(data, 'ExaminedFiles', default=0)))
files_size_gauge.labels(backup=backup_name, operation='examined').set(int(get_json_value(data, 'SizeOfExaminedFiles', default=0)))
files_gauge.labels(backup=backup_name, operation='opened').set(int(get_json_value(data, 'OpenedFiles', default=0)))
files_size_gauge.labels(backup=backup_name, operation='opened').set(int(get_json_value(data, 'SizeOfOpenedFiles', default=0)))
# Backup duration
if duration_sec is not None:
duration_summary.labels(backup=backup_name, result=result).observe(duration_sec)
init_gauge_callbacks(backup_name, result)
recent_backups[backup_name][result].append(datetime.datetime.utcnow())
return 'processed', 200
if __name__ == '__main__':
print("Init...")
duplicati_url = os.getenv("DUPLICATI_URL", None)
if duplicati_url is not None:
print("Will attempt to get backup list from Duplicati for pre-seeding metrics...")
duplicati = duplicati_client.Duplicati(duplicati_url)
for backup in duplicati.get_backup_names():
print("Found backup {0}. Pre-seeding metrics...".format(backup))
pre_seed_metrics(backup)
else:
print("DUPLICATI_URL is not set. Will skip pre-seeding metrics for each backup")
print("Adding scheduler to call maintain_recent_backups() every {0} second(s)".format(SCHEDULED_MAINT_INTERVAL_SEC))
scheduler.add_job(id = 'Scheduled: Manage Recent Backups', func=maintain_recent_backups, trigger="interval", seconds=SCHEDULED_MAINT_INTERVAL_SEC)
scheduler.start()
print("Init complete. Running flask...")
app.run(debug=True, host='0.0.0.0', port=9090)