From 90bdf7dbe3bc5d6af32714825c7110e78b26e69e Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Mon, 9 Mar 2020 14:39:55 +0200
Subject: [PATCH 01/40] move datadog init to AppConfig.ready

---
 corehq/util/datadog/apps.py | 27 +++++++++++++++++++++++++++
 settings.py                 | 14 +-------------
 2 files changed, 28 insertions(+), 13 deletions(-)
 create mode 100644 corehq/util/datadog/apps.py

diff --git a/corehq/util/datadog/apps.py b/corehq/util/datadog/apps.py
new file mode 100644
index 000000000000..032d76e978d5
--- /dev/null
+++ b/corehq/util/datadog/apps.py
@@ -0,0 +1,27 @@
+from django.apps import AppConfig
+from django.conf import settings
+
+
+class DatadogConfig(AppConfig):
+
+    name = 'corehq.util.datadog'
+    verbose_name = 'Datadog'
+
+    def ready(self):
+        if not settings.DATADOG_API_KEY or not settings.DATADOG_APP_KEY:
+            return
+
+        try:
+            from datadog import initialize
+        except ImportError:
+            pass
+        else:
+            initialize(settings.DATADOG_API_KEY, settings.DATADOG_APP_KEY)
+
+        if settings.UNIT_TESTING or settings.DEBUG or 'ddtrace.contrib.django' not in settings.INSTALLED_APPS:
+            try:
+                from ddtrace import tracer
+                tracer.enabled = False
+            except ImportError:
+                pass
+
diff --git a/settings.py b/settings.py
index eaa6db1f5837..f4939ce9adcd 100755
--- a/settings.py
+++ b/settings.py
@@ -340,6 +340,7 @@
     'corehq.motech.openmrs',
     'corehq.motech.repeaters',
     'corehq.util',
+    'corehq.util.datadog.apps.DatadogConfig'
     'dimagi.ext',
     'corehq.blobs',
     'corehq.apps.case_search',
@@ -2073,19 +2074,6 @@
 if 'dummy' not in CACHES:
     CACHES['dummy'] = {'BACKEND': 'django.core.cache.backends.dummy.DummyCache'}
 
-try:
-    from datadog import initialize
-except ImportError:
-    pass
-else:
-    initialize(DATADOG_API_KEY, DATADOG_APP_KEY)
-
-if UNIT_TESTING or DEBUG or 'ddtrace.contrib.django' not in INSTALLED_APPS:
-    try:
-        from ddtrace import tracer
-        tracer.enabled = False
-    except ImportError:
-        pass
 
 REST_FRAMEWORK = {
     'DATETIME_FORMAT': '%Y-%m-%dT%H:%M:%S.%fZ',

From 7ad93fa2d6aeee3b0d337ae28eb5e0f83516e3e1 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 10 Mar 2020 12:33:08 +0200
Subject: [PATCH 02/40] pluggable metrics

---
 corehq/util/metrics/__init__.py   |  19 ++++
 corehq/util/metrics/datadog.py    |  49 +++++++++++
 corehq/util/metrics/metrics.py    | 139 ++++++++++++++++++++++++++++++
 corehq/util/metrics/prometheus.py |  46 ++++++++++
 settings.py                       |   3 +-
 5 files changed, 255 insertions(+), 1 deletion(-)
 create mode 100644 corehq/util/metrics/__init__.py
 create mode 100644 corehq/util/metrics/datadog.py
 create mode 100644 corehq/util/metrics/metrics.py
 create mode 100644 corehq/util/metrics/prometheus.py

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
new file mode 100644
index 000000000000..7672abf5b545
--- /dev/null
+++ b/corehq/util/metrics/__init__.py
@@ -0,0 +1,19 @@
+from corehq.util.metrics.datadog import DatadogMetrics
+from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, HqMetrics
+from corehq.util.metrics.prometheus import PrometheusMetrics
+
+_metrics = None  # singleton/global
+
+
+def get_metrics() -> HqMetrics:
+    global _metrics
+    if not _metrics:
+        enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()]))
+        if not enabled:
+            _metrics = DummyMetrics()
+
+        if len(enabled) > 1:
+            _metrics = DelegatedMetrics(enabled)
+
+        _metrics = enabled[0]
+    return _metrics
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
new file mode 100644
index 000000000000..a0598168e9b1
--- /dev/null
+++ b/corehq/util/metrics/datadog.py
@@ -0,0 +1,49 @@
+import logging
+
+from django.conf import settings
+
+from corehq.util.metrics.metrics import HqCounter, HqGauge, HqMetrics
+from datadog import api
+from datadog.dogstatsd.base import DogStatsd
+
+datadog_logger = logging.getLogger('datadog')
+
+COMMON_TAGS = ['environment:{}'.format(settings.SERVER_ENVIRONMENT)]
+
+statsd = DogStatsd(constant_tags=COMMON_TAGS)
+
+
+def _format_tags(tag_names, tag_values):
+    if not tag_names or not tag_values:
+        return None
+
+    return [
+        f'{name}:{value}' for name, value in zip(tag_names, tag_values)
+    ]
+
+
+def _datadog_record(fn, name, value, tags=None):
+    try:
+        fn(name, value, tags=tags)
+    except Exception:
+        datadog_logger.exception('Unable to record Datadog stats')
+
+
+class Counter(HqCounter):
+    def _inc(self, amount=1):
+        tags = _format_tags(self.tag_names, self.tag_values)
+        _datadog_record(statsd.increment, self.name, amount, tags)
+
+
+class Gauge(HqGauge):
+    def _set(self, value):
+        tags = _format_tags(self.tag_names, self.tag_values)
+        _datadog_record(statsd.gauge, self.name, value, tags)
+
+
+class DatadogMetrics(HqMetrics):
+    _counter_class = Counter
+    _gauge_class = Gauge
+
+    def enabled(self) -> bool:
+        return api._api_key and api._application_key
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
new file mode 100644
index 000000000000..9b52266bee92
--- /dev/null
+++ b/corehq/util/metrics/metrics.py
@@ -0,0 +1,139 @@
+import abc
+import re
+from abc import abstractmethod
+from typing import Iterable
+
+from corehq.util.soft_assert import soft_assert
+
+METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:.][a-zA-Z0-9_:.]*$')
+METRIC_TAG_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
+RESERVED_METRIC_TAG_NAME_RE = re.compile(r'^__.*$')
+RESERVED_METRIC_TAG_NAMES = ['quantile', 'le']
+
+
+def _enforce_prefix(name, prefix):
+    soft_assert(fail_if_debug=True).call(
+        not prefix or name.startswith(prefix),
+        "Did you mean to call your metric 'commcare.{}'? ".format(name))
+
+
+def _validate_tag_names(tag_names):
+    tag_names = tuple(tag_names)
+    for l in tag_names:
+        if not METRIC_TAG_NAME_RE.match(l):
+            raise ValueError('Invalid metric tag name: ' + l)
+        if RESERVED_METRIC_TAG_NAME_RE.match(l):
+            raise ValueError('Reserved metric tag name: ' + l)
+        if l in RESERVED_METRIC_TAG_NAMES:
+            raise ValueError('Reserved metric tag name: ' + l)
+    return tag_names
+
+
+class MetricBase:
+    def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), tag_values=None):
+        self.name = name
+        if not METRIC_NAME_RE.match(name):
+            raise ValueError('Invalid metric name: ' + name)
+        _enforce_prefix(name, 'commcare')
+        self.documentation = documentation
+        self.tag_names = _validate_tag_names(tag_names)
+        self.tag_values = tag_values
+
+    def tag(self, **tag_kwargs):
+        if sorted(tag_kwargs) != sorted(self.tag_names):
+            raise ValueError('Incorrect tag names')
+
+        tag_values = tuple(str(tag_kwargs[t]) for t in self.tag_names)
+        return self._get_tagged_instance(tag_values)
+
+    def _get_tagged_instance(self, tag_values):
+        return self.__class__(self.name, self.documentation, self.tag_names, tag_values)
+
+    def _validate_tags(self):
+        if self.tag_names and not self.tag_values:
+            raise Exception('Metric has missing tag values.')
+
+
+class HqCounter(MetricBase):
+    def inc(self, amount: float = 1):
+        """Increment the counter by the given amount."""
+        self._validate_tags()
+        self._inc(amount)
+
+    def _inc(self, amount: float = 1):
+        """Override this method to record the metric"""
+        pass
+
+
+class HqGauge(MetricBase):
+    def set(self, value: float):
+        """Set gauge to the given value."""
+        self._validate_tags()
+        self._set(value)
+
+    def _set(self, value: float):
+        """Override this method to record the metric"""
+        pass
+
+
+class HqMetrics(metaclass=abc.ABCMeta):
+    _counter_class = None
+    _gauge_class = None
+
+    @abstractmethod
+    def enabled(self) -> bool:
+        raise NotImplementedError
+
+    def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqCounter:
+        return self._counter_class(name, documentation, tag_names)
+
+    def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqGauge:
+        return self._gauge_class(name, documentation, tag_names)
+
+
+class DummyMetrics(HqMetrics):
+    _counter_class = HqCounter
+    _gauge_class = HqGauge
+
+    def enabled(self) -> bool:
+        return True
+
+
+class DelegatedMetrics(HqMetrics):
+    def __init__(self, delegates):
+        self.delegates = delegates
+
+    def enabled(self) -> bool:
+        return True
+
+    def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()):
+        return DelegatingCounter([
+            d.counter(name, documentation, tag_names) for d in self.delegates
+        ])
+
+    def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()):
+        return DelegatingGauge([
+            d.gauge(name, documentation, tag_names) for d in self.delegates
+        ])
+
+
+class DelegatingMetric:
+    def __init__(self, delegates):
+        self._delegates = delegates
+
+    def tag(self, **tag_kwargs):
+        return self.__class__([
+            d.tag(**tag_kwargs) for d in self._delegates
+        ])
+
+
+class DelegatingCounter(DelegatingMetric):
+    def inc(self, amount: float = 1):
+        for d in self._delegates:
+            d.inc(amount)
+
+
+class DelegatingGauge(DelegatingMetric):
+    def set(self, value: float):
+        for d in self._delegates:
+            d.set(value)
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
new file mode 100644
index 000000000000..6637f7097520
--- /dev/null
+++ b/corehq/util/metrics/prometheus.py
@@ -0,0 +1,46 @@
+from typing import Iterable
+
+import settings
+from corehq.util.metrics.metrics import (
+    HqCounter,
+    HqGauge,
+    HqMetrics,
+    MetricBase,
+)
+from prometheus_client import Counter as PCounter
+from prometheus_client import Gauge as PGauge
+
+
+class PrometheusMetricBase(MetricBase):
+    _metric_class = None
+
+    def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), tag_values=None, delegate=None):
+        name = name.replace('.', '_')
+        super().__init__(name, documentation, tag_names, tag_values)
+        self._delegate = delegate or self._metric_class(name, documentation, tag_names, labelvalues=tag_values)
+
+    def _get_tagged_instance(self, tag_values):
+        delegate = self._delegate.labels(dict(zip(self.tag_names, tag_values)))
+        return self.__class__(self.name, self.documentation, self.tag_names, self.tag_values, delegate)
+
+
+class Counter(PrometheusMetricBase, HqCounter):
+    _metric_class = PCounter
+
+    def _inc(self, amount=1):
+        self._delegate.inc(amount)
+
+
+class Gauge(PrometheusMetricBase, HqGauge):
+    _metric_class = PGauge
+
+    def _set(self, value: float):
+        self._delegate.set(value)
+
+
+class PrometheusMetrics(HqMetrics):
+    _counter_class = Counter
+    _gauge_class = Gauge
+
+    def enabled(self) -> bool:
+        return settings.ENABLE_PROMETHEUS_METRICS
diff --git a/settings.py b/settings.py
index f4939ce9adcd..05431414ab78 100755
--- a/settings.py
+++ b/settings.py
@@ -340,7 +340,7 @@
     'corehq.motech.openmrs',
     'corehq.motech.repeaters',
     'corehq.util',
-    'corehq.util.datadog.apps.DatadogConfig'
+    'corehq.util.datadog.apps.DatadogConfig',
     'dimagi.ext',
     'corehq.blobs',
     'corehq.apps.case_search',
@@ -838,6 +838,7 @@
 
 DATADOG_API_KEY = None
 DATADOG_APP_KEY = None
+ENABLE_PROMETHEUS_METRICS = False
 
 SYNCLOGS_SQL_DB_ALIAS = 'default'
 

From 0694926fc68420d9ef09b8d5a29f315abd3e8266 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 10 Mar 2020 17:41:01 +0200
Subject: [PATCH 03/40] add histogram and tests

---
 corehq/util/metrics/datadog.py            |  40 ++++-
 corehq/util/metrics/metrics.py            |  58 ++++++--
 corehq/util/metrics/prometheus.py         |  43 ++++--
 corehq/util/metrics/tests/__init__.py     |   0
 corehq/util/metrics/tests/test_metrics.py | 171 ++++++++++++++++++++++
 corehq/util/metrics/tests/utils.py        |  22 +++
 6 files changed, 308 insertions(+), 26 deletions(-)
 create mode 100644 corehq/util/metrics/tests/__init__.py
 create mode 100644 corehq/util/metrics/tests/test_metrics.py
 create mode 100644 corehq/util/metrics/tests/utils.py

diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index a0598168e9b1..748718c3f23c 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -2,7 +2,13 @@
 
 from django.conf import settings
 
-from corehq.util.metrics.metrics import HqCounter, HqGauge, HqMetrics
+from corehq.util.datadog.utils import bucket_value
+from corehq.util.metrics.metrics import (
+    HqCounter,
+    HqGauge,
+    HqHistogram,
+    HqMetrics,
+)
 from datadog import api
 from datadog.dogstatsd.base import DogStatsd
 
@@ -30,20 +36,48 @@ def _datadog_record(fn, name, value, tags=None):
 
 
 class Counter(HqCounter):
-    def _inc(self, amount=1):
+    def _record(self, amount: float):
         tags = _format_tags(self.tag_names, self.tag_values)
         _datadog_record(statsd.increment, self.name, amount, tags)
 
 
 class Gauge(HqGauge):
-    def _set(self, value):
+    def _record(self, value):
         tags = _format_tags(self.tag_names, self.tag_values)
         _datadog_record(statsd.gauge, self.name, value, tags)
 
 
+class Histogram(HqHistogram):
+    """This implementation of histogram uses tagging to record the buckets.
+    It does not use the Datadog Histogram metric type.
+
+    The metric itself will be incremented by 1 on each call to `observe`. The value
+    passed to `observe` will be used to create the bucket tag.
+
+    For example:
+
+        h = Histogram(
+            'commcare.request.duration', 'description',
+            bucket_tag='duration', buckets=[1,2,3], bucket_units='ms'
+        )
+        h.observe(1.4)
+
+        # resulting Datadog metric
+        #    commcare.request.duration:1|c|#duration:lt_2ms
+
+    More details: https://help.datadoghq.com/hc/en-us/articles/211545826
+    """
+    def _record(self, value: float):
+        tags = _format_tags(self.tag_names, self.tag_values)
+        bucket = bucket_value(value, self._buckets, self._bucket_unit)
+        tags.append(f'{self._bucket_tag}:{bucket}')
+        _datadog_record(statsd.increment, self.name, 1, tags)
+
+
 class DatadogMetrics(HqMetrics):
     _counter_class = Counter
     _gauge_class = Gauge
+    _histogram_class = Histogram
 
     def enabled(self) -> bool:
         return api._api_key and api._application_key
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 9b52266bee92..73f7dad36e75 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -1,9 +1,10 @@
 import abc
 import re
 from abc import abstractmethod
-from typing import Iterable
+from typing import Iterable, List
 
 from corehq.util.soft_assert import soft_assert
+from prometheus_client.utils import INF
 
 METRIC_NAME_RE = re.compile(r'^[a-zA-Z_:.][a-zA-Z0-9_:.]*$')
 METRIC_TAG_NAME_RE = re.compile(r'^[a-zA-Z_][a-zA-Z0-9_]*$')
@@ -30,14 +31,19 @@ def _validate_tag_names(tag_names):
 
 
 class MetricBase:
-    def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), tag_values=None):
+    def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), **kwargs):
         self.name = name
         if not METRIC_NAME_RE.match(name):
             raise ValueError('Invalid metric name: ' + name)
         _enforce_prefix(name, 'commcare')
         self.documentation = documentation
         self.tag_names = _validate_tag_names(tag_names)
-        self.tag_values = tag_values
+        self.tag_values = kwargs.pop('tag_values', None)
+        self._kwargs = kwargs
+        self._init_metric()
+
+    def _init_metric(self):
+        pass
 
     def tag(self, **tag_kwargs):
         if sorted(tag_kwargs) != sorted(self.tag_names):
@@ -47,38 +53,54 @@ def tag(self, **tag_kwargs):
         return self._get_tagged_instance(tag_values)
 
     def _get_tagged_instance(self, tag_values):
-        return self.__class__(self.name, self.documentation, self.tag_names, tag_values)
+        return self.__class__(
+            self.name, self.documentation, tag_names=self.tag_names, tag_values=tag_values, **self._kwargs
+        )
 
     def _validate_tags(self):
         if self.tag_names and not self.tag_values:
             raise Exception('Metric has missing tag values.')
 
+    def _record(self, value: float):
+        pass
+
 
 class HqCounter(MetricBase):
     def inc(self, amount: float = 1):
         """Increment the counter by the given amount."""
         self._validate_tags()
-        self._inc(amount)
-
-    def _inc(self, amount: float = 1):
-        """Override this method to record the metric"""
-        pass
+        self._record(amount)
 
 
 class HqGauge(MetricBase):
     def set(self, value: float):
         """Set gauge to the given value."""
         self._validate_tags()
-        self._set(value)
+        self._record(value)
 
-    def _set(self, value: float):
-        """Override this method to record the metric"""
-        pass
+
+DEFAULT_BUCKETS = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF)
+
+
+class HqHistogram(MetricBase):
+
+    def _init_metric(self):
+        self._buckets = self._kwargs.get('buckets') or DEFAULT_BUCKETS
+        self._bucket_unit = self._kwargs.get('bucket_unit', '')
+        self._bucket_tag = self._kwargs.get('bucket_tag')
+        if self._bucket_tag in self.tag_names:
+            self.tag_names = tuple([name for name in self.tag_names if name != self._bucket_tag])
+
+    def observe(self, value: float):
+        """Update histogram with the given value."""
+        self._validate_tags()
+        self._record(value)
 
 
 class HqMetrics(metaclass=abc.ABCMeta):
     _counter_class = None
     _gauge_class = None
+    _histogram_class = None
 
     @abstractmethod
     def enabled(self) -> bool:
@@ -90,6 +112,16 @@ def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()) ->
     def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqGauge:
         return self._gauge_class(name, documentation, tag_names)
 
+    def histogram(self, name: str, documentation: str,
+                  bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
+                  tag_names: Iterable=tuple()) -> HqGauge:
+        """Create a histogram metric. Histogram implementations differ between provider. See provider
+        implementations for details.
+        """
+        return self._histogram_class(
+            name, documentation, tag_names, bucket_tag=bucket_tag, buckets=buckets, bucket_unit=bucket_unit
+        )
+
 
 class DummyMetrics(HqMetrics):
     _counter_class = HqCounter
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
index 6637f7097520..c98146f3886d 100644
--- a/corehq/util/metrics/prometheus.py
+++ b/corehq/util/metrics/prometheus.py
@@ -1,46 +1,69 @@
-from typing import Iterable
-
 import settings
 from corehq.util.metrics.metrics import (
     HqCounter,
     HqGauge,
+    HqHistogram,
     HqMetrics,
     MetricBase,
 )
 from prometheus_client import Counter as PCounter
 from prometheus_client import Gauge as PGauge
+from prometheus_client import Histogram as PHistogram
 
 
 class PrometheusMetricBase(MetricBase):
     _metric_class = None
 
-    def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), tag_values=None, delegate=None):
-        name = name.replace('.', '_')
-        super().__init__(name, documentation, tag_names, tag_values)
-        self._delegate = delegate or self._metric_class(name, documentation, tag_names, labelvalues=tag_values)
+    def _init_metric(self):
+        super()._init_metric()
+        self.name = self.name.replace('.', '_')
+        delegate = self._kwargs.get('delegate')
+        self._delegate = delegate or self._metric_class(self.name, self.documentation, self.tag_names)
 
     def _get_tagged_instance(self, tag_values):
-        delegate = self._delegate.labels(dict(zip(self.tag_names, tag_values)))
-        return self.__class__(self.name, self.documentation, self.tag_names, self.tag_values, delegate)
+        delegate = self._delegate.labels(**dict(zip(self.tag_names, tag_values)))
+        return self.__class__(
+            self.name, self.documentation,
+            tag_names=self.tag_names, tag_values=tag_values, delegate=delegate, **self._kwargs
+        )
 
 
 class Counter(PrometheusMetricBase, HqCounter):
+    """https://prometheus.io/docs/concepts/metric_types/#counter"""
     _metric_class = PCounter
 
-    def _inc(self, amount=1):
+    def _record(self, amount: float):
         self._delegate.inc(amount)
 
 
 class Gauge(PrometheusMetricBase, HqGauge):
+    """https://prometheus.io/docs/concepts/metric_types/#gauge"""
     _metric_class = PGauge
 
-    def _set(self, value: float):
+    def _record(self, value: float):
         self._delegate.set(value)
 
 
+class Histogram(PrometheusMetricBase, HqHistogram):
+    """This metric class implements the native Prometheus Histogram type
+
+    https://prometheus.io/docs/concepts/metric_types/#histogram
+    """
+    def _init_metric(self):
+        HqHistogram._init_metric(self)  # skip _init_metric on PrometheusMetricBase
+        self.name = self.name.replace('.', '_')
+        self._delegate = self._kwargs.get('delegate')
+        if not self._delegate:
+            self._delegate = PHistogram(self.name, self.documentation, self.tag_names, buckets=self._buckets)
+
+    def _record(self, value: float):
+        self._delegate.observe(value)
+
+
 class PrometheusMetrics(HqMetrics):
     _counter_class = Counter
     _gauge_class = Gauge
+    _histogram_class = Histogram
 
     def enabled(self) -> bool:
         return settings.ENABLE_PROMETHEUS_METRICS
diff --git a/corehq/util/metrics/tests/__init__.py b/corehq/util/metrics/tests/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py
new file mode 100644
index 000000000000..caf4eb4c0be6
--- /dev/null
+++ b/corehq/util/metrics/tests/test_metrics.py
@@ -0,0 +1,171 @@
+from typing import Dict, Tuple
+
+from django.test import SimpleTestCase
+
+from corehq.util.metrics import DatadogMetrics, PrometheusMetrics
+from corehq.util.metrics.metrics import HqCounter, HqGauge, HqHistogram
+from corehq.util.metrics.tests.utils import patch_datadog
+from prometheus_client.samples import Sample
+from prometheus_client.utils import INF
+
+
+class _TestMetrics(SimpleTestCase):
+    provider_class = None
+
+    @classmethod
+    def setUpClass(cls):
+        super().setUpClass()
+        cls.provider = cls.provider_class()
+
+    def test_counter(self):
+        counter = self.provider.counter('commcare.test.counter', 'Description', tag_names=['t1', 't2'])
+        counter.tag(t1='a', t2='b').inc()
+        counter.tag(t1='c', t2='b').inc(2)
+        counter.tag(t1='c', t2='b').inc()
+        self.assertCounterMetric(counter, {
+            (('t1', 'a'), ('t2', 'b')): 1,
+            (('t1', 'c'), ('t2', 'b')): 3,
+        })
+
+    def test_gauge(self):
+        gauge = self.provider.gauge('commcare.test.gauge', 'Description', tag_names=['t1', 't2'])
+        gauge.tag(t1='a', t2='b').set(4.2)
+        gauge.tag(t1='c', t2='b').set(2)
+        gauge.tag(t1='c', t2='b').set(5)
+        self.assertGaugeMetric(gauge, {
+            (('t1', 'a'), ('t2', 'b')): 4.2,
+            (('t1', 'c'), ('t2', 'b')): 5,
+        })
+
+    def test_histogram(self):
+        histogram = self.provider.histogram(
+            'commcare.test.histogram', 'Description', 'duration',
+            buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2']
+        )
+        tagged_1 = histogram.tag(t1='a', t2='b')
+        tagged_1.observe(0.2)
+        tagged_1.observe(0.7)
+        tagged_1.observe(2.5)
+
+        tagged_2 = histogram.tag(t1='c', t2='b')
+        tagged_2.observe(2)
+        tagged_2.observe(5)
+        self.assertHistogramMetric(histogram, {
+            (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1},
+            (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1}
+        })
+
+    def assertCounterMetric(self, metric: HqCounter, expected: Dict[Tuple[Tuple[str, str], ...], float]):
+        """
+        :param metric: metric class
+        :param expected: dict mapping tag tuples to metric values
+        """
+        raise NotImplementedError
+
+    def assertGaugeMetric(self, metric: HqGauge, expected: Dict[Tuple[Tuple[str, str], ...], float]):
+        """
+        :param metric: metric class
+        :param expected: dict mapping tag tuples to metric values
+        """
+        raise NotImplementedError
+
+    def assertHistogramMetric(self,
+                              metric: HqHistogram,
+                              expected: Dict[Tuple[Tuple[str, str], ...], Dict[float, int]]):
+        """
+        :param metric: metric class
+        :param expected: dict mapping tag tuples to a dict of bucket values
+        """
+        raise NotImplementedError
+
+
+class TestDatadogMetrics(_TestMetrics):
+    provider_class = DatadogMetrics
+
+    def setUp(self) -> None:
+        super().setUp()
+        self.patch = patch_datadog()
+        self.metrics = self.patch.__enter__()
+
+    def tearDown(self) -> None:
+        self.patch.__exit__(None, None, None)
+        super().tearDown()
+
+    def assertCounterMetric(self, metric, expected):
+        self.assertEqual({key[0] for key in self.metrics}, {metric.name})
+        actual = {
+            key[1]: sum(val) for key, val in self.metrics.items()
+        }
+        self.assertDictEqual(actual, expected)
+
+    def assertGaugeMetric(self, metric, expected):
+        self.assertEqual({key[0] for key in self.metrics}, {metric.name})
+        actual = {
+            key[1]: val[-1] for key, val in self.metrics.items()
+        }
+        self.assertDictEqual(actual, expected)
+
+    def assertHistogramMetric(self, metric, expected):
+        self.assertEqual({key[0] for key in self.metrics}, {metric.name})
+        expected_samples = {}
+        for tags, buckets in expected.items():
+            for bucket, val in buckets.items():
+                prefix = 'lt'
+                if bucket == INF:
+                    bucket = metric._buckets[-1]
+                    prefix = 'over'
+                bucket_tag = (metric._bucket_tag, f'{prefix}_{bucket:03d}{metric._bucket_unit}')
+                expected_samples[tags + (bucket_tag,)] = val
+
+        actual = {
+            key[1]: sum(val) for key, val in self.metrics.items()
+        }
+        self.assertDictEqual(actual, expected_samples)
+
+
+class TestPrometheusMetrics(_TestMetrics):
+    provider_class = PrometheusMetrics
+
+    def _samples_to_dics(self, samples, filter_name=None):
+        """Convert a Sample tuple into a dict((name, (labels tuple)) -> value)"""
+        return {
+            tuple(sample.labels.items()): sample.value
+            for sample in samples
+            if not filter_name or sample.name == filter_name
+        }
+
+    def assertGaugeMetric(self, metric, expected):
+        [collected] = metric._delegate.collect()
+        actual = self._samples_to_dics(collected.samples)
+        self.assertDictEqual(actual, expected)
+
+    def assertCounterMetric(self, metric, expected):
+        total_name = f'{metric.name}_total'
+        [collected] = metric._delegate.collect()
+        actual = self._samples_to_dics(collected.samples, total_name)
+        self.assertDictEqual(actual, expected)
+
+    def assertHistogramMetric(self, metric, expected):
+        # Note that Prometheus histograms are cumulative so we must sum up the successive bucket values
+        # https://en.wikipedia.org/wiki/Histogram#Cumulative_histogram
+        [collected] = metric._delegate.collect()
+
+        sample_name = f'{metric.name}_bucket'
+        expected_samples = []
+        for key, value in expected.items():
+            cumulative_value = 0
+            for bucket in metric._buckets:
+                val = value.get(bucket, 0)
+                cumulative_value += val
+                labels = dict(key + (('le', str(float(bucket))),))
+                expected_samples.append(Sample(sample_name, labels, float(cumulative_value), None, None))
+
+            labels = dict(key + (('le', '+Inf'),))
+            cumulative_value += value.get(INF, 0)
+            expected_samples.append(Sample(sample_name, labels, float(cumulative_value), None, None))
+
+        actual = [
+            s for s in collected.samples
+            if s.name.endswith('bucket')
+        ]
+        self.assertListEqual(actual, expected_samples)
diff --git a/corehq/util/metrics/tests/utils.py b/corehq/util/metrics/tests/utils.py
new file mode 100644
index 000000000000..9c7f94b2d104
--- /dev/null
+++ b/corehq/util/metrics/tests/utils.py
@@ -0,0 +1,22 @@
+from collections import defaultdict
+from contextlib import contextmanager
+
+import mock
+
+
+@contextmanager
+def patch_datadog():
+    def record(fn, name, value, tags=None):
+        key = tuple([
+            name,
+            tuple([tuple(t.split(':')) for t in (tags or [])]),
+        ])
+        stats[key].append(value)
+
+    stats = defaultdict(list)
+    patch = mock.patch("corehq.util.metrics.datadog._datadog_record", new=record)
+    patch.start()
+    try:
+        yield stats
+    finally:
+        patch.stop()

From c1f8270e865523937f4390fb7ea55ba093ec185c Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 10 Mar 2020 17:41:11 +0200
Subject: [PATCH 04/40] bucket values less than or equal

---
 corehq/util/datadog/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/corehq/util/datadog/utils.py b/corehq/util/datadog/utils.py
index 9d495679260f..c61a4d099cb1 100644
--- a/corehq/util/datadog/utils.py
+++ b/corehq/util/datadog/utils.py
@@ -115,7 +115,7 @@ def bucket_value(value, buckets, unit=''):
     lt_template = "lt_{:0%s}{}" % number_length
     over_template = "over_{:0%s}{}" % number_length
     for bucket in buckets:
-        if value < bucket:
+        if value <= bucket:
             return lt_template.format(bucket, unit)
     return over_template.format(buckets[-1], unit)
 

From 5034156b76b1918a6b784f6cd8883380cc0bbec3 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 10 Mar 2020 18:08:00 +0200
Subject: [PATCH 05/40] add prometheus client to requirements

---
 requirements-python3/dev-requirements.txt  | 1 +
 requirements-python3/prod-requirements.txt | 1 +
 requirements-python3/requirements.txt      | 1 +
 requirements-python3/test-requirements.txt | 1 +
 requirements/dev-requirements.txt          | 1 +
 requirements/prod-requirements.txt         | 1 +
 requirements/requirements.in               | 1 +
 requirements/requirements.txt              | 1 +
 requirements/test-requirements.txt         | 1 +
 9 files changed, 9 insertions(+)

diff --git a/requirements-python3/dev-requirements.txt b/requirements-python3/dev-requirements.txt
index d0741b0bb668..ddaa038ed973 100644
--- a/requirements-python3/dev-requirements.txt
+++ b/requirements-python3/dev-requirements.txt
@@ -130,6 +130,7 @@ pillow==6.2.1
 pip-tools==4.4.0
 ply==3.11                 # via eulxml, jsonpath-rw
 polib==1.1.0
+prometheus-client==0.7.1
 prompt-toolkit==1.0.18    # via ipython
 psutil==5.1.3
 psycogreen==1.0.1
diff --git a/requirements-python3/prod-requirements.txt b/requirements-python3/prod-requirements.txt
index 5627994d311c..128b82f8f986 100644
--- a/requirements-python3/prod-requirements.txt
+++ b/requirements-python3/prod-requirements.txt
@@ -109,6 +109,7 @@ pickleshare==0.7.5        # via ipython
 pillow==6.2.1
 ply==3.11                 # via eulxml, jsonpath-rw
 polib==1.1.0
+prometheus-client==0.7.1
 prompt-toolkit==1.0.18    # via ipython
 psycogreen==1.0.1
 psycopg2==2.7.7
diff --git a/requirements-python3/requirements.txt b/requirements-python3/requirements.txt
index 207e181a974d..148161bb967e 100644
--- a/requirements-python3/requirements.txt
+++ b/requirements-python3/requirements.txt
@@ -101,6 +101,7 @@ phonenumberslite==8.10.22
 pillow==6.2.1
 ply==3.11                 # via eulxml, jsonpath-rw
 polib==1.1.0
+prometheus-client==0.7.1
 psycogreen==1.0.1
 psycopg2==2.7.7
 py-kissmetrics==1.0.1
diff --git a/requirements-python3/test-requirements.txt b/requirements-python3/test-requirements.txt
index 12bf9a824042..6c5950ac74cf 100644
--- a/requirements-python3/test-requirements.txt
+++ b/requirements-python3/test-requirements.txt
@@ -113,6 +113,7 @@ pillow==6.2.1
 pip-tools==4.4.0
 ply==3.11                 # via eulxml, jsonpath-rw
 polib==1.1.0
+prometheus-client==0.7.1
 psycogreen==1.0.1
 psycopg2==2.7.7
 py-kissmetrics==1.0.1
diff --git a/requirements/dev-requirements.txt b/requirements/dev-requirements.txt
index d0741b0bb668..ddaa038ed973 100644
--- a/requirements/dev-requirements.txt
+++ b/requirements/dev-requirements.txt
@@ -130,6 +130,7 @@ pillow==6.2.1
 pip-tools==4.4.0
 ply==3.11                 # via eulxml, jsonpath-rw
 polib==1.1.0
+prometheus-client==0.7.1
 prompt-toolkit==1.0.18    # via ipython
 psutil==5.1.3
 psycogreen==1.0.1
diff --git a/requirements/prod-requirements.txt b/requirements/prod-requirements.txt
index 5627994d311c..128b82f8f986 100644
--- a/requirements/prod-requirements.txt
+++ b/requirements/prod-requirements.txt
@@ -109,6 +109,7 @@ pickleshare==0.7.5        # via ipython
 pillow==6.2.1
 ply==3.11                 # via eulxml, jsonpath-rw
 polib==1.1.0
+prometheus-client==0.7.1
 prompt-toolkit==1.0.18    # via ipython
 psycogreen==1.0.1
 psycopg2==2.7.7
diff --git a/requirements/requirements.in b/requirements/requirements.in
index c4754c951eea..25295c647c87 100644
--- a/requirements/requirements.in
+++ b/requirements/requirements.in
@@ -114,3 +114,4 @@ werkzeug==0.11.15
 CommcareTranslationChecker==0.9.3.5
 WeasyPrint==0.42.3
 architect==0.5.6
+prometheus-client==0.7.1
diff --git a/requirements/requirements.txt b/requirements/requirements.txt
index 207e181a974d..148161bb967e 100644
--- a/requirements/requirements.txt
+++ b/requirements/requirements.txt
@@ -101,6 +101,7 @@ phonenumberslite==8.10.22
 pillow==6.2.1
 ply==3.11                 # via eulxml, jsonpath-rw
 polib==1.1.0
+prometheus-client==0.7.1
 psycogreen==1.0.1
 psycopg2==2.7.7
 py-kissmetrics==1.0.1
diff --git a/requirements/test-requirements.txt b/requirements/test-requirements.txt
index 12bf9a824042..6c5950ac74cf 100644
--- a/requirements/test-requirements.txt
+++ b/requirements/test-requirements.txt
@@ -113,6 +113,7 @@ pillow==6.2.1
 pip-tools==4.4.0
 ply==3.11                 # via eulxml, jsonpath-rw
 polib==1.1.0
+prometheus-client==0.7.1
 psycogreen==1.0.1
 psycopg2==2.7.7
 py-kissmetrics==1.0.1

From 2be4e6a27c6cab64d0ebc696c2f6341915e2e79e Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 11 Mar 2020 14:46:18 +0200
Subject: [PATCH 06/40] make metrics lazy

---
 corehq/util/metrics/__init__.py           | 20 +++----
 corehq/util/metrics/datadog.py            |  2 +
 corehq/util/metrics/metrics.py            | 63 +++++++++++------------
 corehq/util/metrics/tests/test_metrics.py | 60 ++++++++++++++++++---
 4 files changed, 93 insertions(+), 52 deletions(-)

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 7672abf5b545..6789a3130bfc 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -1,19 +1,15 @@
+from django.utils.functional import SimpleLazyObject
+
 from corehq.util.metrics.datadog import DatadogMetrics
 from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, HqMetrics
 from corehq.util.metrics.prometheus import PrometheusMetrics
 
-_metrics = None  # singleton/global
-
 
-def get_metrics() -> HqMetrics:
-    global _metrics
-    if not _metrics:
-        enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()]))
-        if not enabled:
-            _metrics = DummyMetrics()
+def _get_metrics():
+    enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()]))
+    if not enabled:
+        return [DummyMetrics()]
+    return enabled
 
-        if len(enabled) > 1:
-            _metrics = DelegatedMetrics(enabled)
 
-        _metrics = enabled[0]
-    return _metrics
+metrics = DelegatedMetrics(SimpleLazyObject(_get_metrics))  # singleton/global
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index 748718c3f23c..e13d5c3d9a1a 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -69,6 +69,8 @@ class Histogram(HqHistogram):
     """
     def _record(self, value: float):
         tags = _format_tags(self.tag_names, self.tag_values)
+        if not tags:
+            tags = []
         bucket = bucket_value(value, self._buckets, self._bucket_unit)
         tags.append(f'{self._bucket_tag}:{bucket}')
         _datadog_record(statsd.increment, self.name, 1, tags)
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 73f7dad36e75..c4038abaf6ed 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -3,6 +3,8 @@
 from abc import abstractmethod
 from typing import Iterable, List
 
+from django.utils.functional import SimpleLazyObject
+
 from corehq.util.soft_assert import soft_assert
 from prometheus_client.utils import INF
 
@@ -31,7 +33,7 @@ def _validate_tag_names(tag_names):
 
 
 class MetricBase:
-    def __init__(self, name: str, documentation: str, tag_names: Iterable=tuple(), **kwargs):
+    def __init__(self, name: str, documentation: str, tag_names: Iterable = tuple(), **kwargs):
         self.name = name
         if not METRIC_NAME_RE.match(name):
             raise ValueError('Invalid metric name: ' + name)
@@ -62,7 +64,7 @@ def _validate_tags(self):
             raise Exception('Metric has missing tag values.')
 
     def _record(self, value: float):
-        pass
+        raise NotImplementedError
 
 
 class HqCounter(MetricBase):
@@ -114,7 +116,7 @@ def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> H
 
     def histogram(self, name: str, documentation: str,
                   bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
-                  tag_names: Iterable=tuple()) -> HqGauge:
+                  tag_names: Iterable=tuple()) -> HqHistogram:
         """Create a histogram metric. Histogram implementations differ between provider. See provider
         implementations for details.
         """
@@ -131,41 +133,38 @@ def enabled(self) -> bool:
         return True
 
 
-class DelegatedMetrics(HqMetrics):
+class DelegatedMetrics:
     def __init__(self, delegates):
         self.delegates = delegates
-
-    def enabled(self) -> bool:
-        return True
-
-    def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()):
-        return DelegatingCounter([
-            d.counter(name, documentation, tag_names) for d in self.delegates
-        ])
-
-    def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()):
-        return DelegatingGauge([
-            d.gauge(name, documentation, tag_names) for d in self.delegates
-        ])
+        self._types = {
+            'counter': 'inc',
+            'gauge': 'set',
+            'histogram': 'observe',
+        }
+
+    def __getattr__(self, item):
+        if item in self._types:
+            def _make_type(*args, **kwargs):
+                return SimpleLazyObject(lambda: DelegatingMetric([
+                    getattr(d, item)(*args, **kwargs) for d in self.delegates
+                ], self._types[item]))
+            return _make_type
+        raise AttributeError
 
 
 class DelegatingMetric:
-    def __init__(self, delegates):
+    def __init__(self, delegates, record_fn_name):
         self._delegates = delegates
+        self._record_fn_name = record_fn_name
 
-    def tag(self, **tag_kwargs):
-        return self.__class__([
-            d.tag(**tag_kwargs) for d in self._delegates
-        ])
+    def tag(self, *args, **kwargs):
+        return self.__class__([d.tag(*args, **kwargs) for d in self._delegates])
 
+    def __getattr__(self, item):
+        if item == self._record_fn_name:
+            def record(*args, **kwargs):
+                for metric in self._delegates:
+                    getattr(metric, item)(*args, **kwargs)
+            return record
 
-class DelegatingCounter(DelegatingMetric):
-    def inc(self, amount: float = 1):
-        for d in self._delegates:
-            d.inc(amount)
-
-
-class DelegatingGauge(DelegatingMetric):
-    def set(self, value: float):
-        for d in self._delegates:
-            d.set(value)
+        raise AttributeError
diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py
index caf4eb4c0be6..c6d721921780 100644
--- a/corehq/util/metrics/tests/test_metrics.py
+++ b/corehq/util/metrics/tests/test_metrics.py
@@ -1,12 +1,19 @@
 from typing import Dict, Tuple
 
 from django.test import SimpleTestCase
+from django.utils.functional import SimpleLazyObject
 
 from corehq.util.metrics import DatadogMetrics, PrometheusMetrics
-from corehq.util.metrics.metrics import HqCounter, HqGauge, HqHistogram
+from corehq.util.metrics.metrics import (
+    DelegatedMetrics,
+    HqCounter,
+    HqGauge,
+    HqHistogram,
+)
 from corehq.util.metrics.tests.utils import patch_datadog
 from prometheus_client.samples import Sample
 from prometheus_client.utils import INF
+from testil import eq
 
 
 class _TestMetrics(SimpleTestCase):
@@ -85,28 +92,28 @@ class TestDatadogMetrics(_TestMetrics):
     def setUp(self) -> None:
         super().setUp()
         self.patch = patch_datadog()
-        self.metrics = self.patch.__enter__()
+        self.recorded_metrics = self.patch.__enter__()
 
     def tearDown(self) -> None:
         self.patch.__exit__(None, None, None)
         super().tearDown()
 
     def assertCounterMetric(self, metric, expected):
-        self.assertEqual({key[0] for key in self.metrics}, {metric.name})
+        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name})
         actual = {
-            key[1]: sum(val) for key, val in self.metrics.items()
+            key[1]: sum(val) for key, val in self.recorded_metrics.items()
         }
         self.assertDictEqual(actual, expected)
 
     def assertGaugeMetric(self, metric, expected):
-        self.assertEqual({key[0] for key in self.metrics}, {metric.name})
+        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name})
         actual = {
-            key[1]: val[-1] for key, val in self.metrics.items()
+            key[1]: val[-1] for key, val in self.recorded_metrics.items()
         }
         self.assertDictEqual(actual, expected)
 
     def assertHistogramMetric(self, metric, expected):
-        self.assertEqual({key[0] for key in self.metrics}, {metric.name})
+        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name})
         expected_samples = {}
         for tags, buckets in expected.items():
             for bucket, val in buckets.items():
@@ -118,7 +125,7 @@ def assertHistogramMetric(self, metric, expected):
                 expected_samples[tags + (bucket_tag,)] = val
 
         actual = {
-            key[1]: sum(val) for key, val in self.metrics.items()
+            key[1]: sum(val) for key, val in self.recorded_metrics.items()
         }
         self.assertDictEqual(actual, expected_samples)
 
@@ -169,3 +176,40 @@ def assertHistogramMetric(self, metric, expected):
             if s.name.endswith('bucket')
         ]
         self.assertListEqual(actual, expected_samples)
+
+
+def test_delegate_lazy():
+    metrics = DelegatedMetrics([DatadogMetrics(), PrometheusMetrics()])
+
+    def _check(metric):
+        assert isinstance(metric, SimpleLazyObject), ''
+
+    test_cases = [
+        metrics.counter('commcare.name.1', ''),
+        metrics.gauge('commcare.name.2', ''),
+        metrics.histogram('commcare.name.3', '', 'duration'),
+    ]
+    for metric in test_cases:
+        yield _check, metric
+
+
+def test_lazy_recording():
+    metrics = DelegatedMetrics([DatadogMetrics(), PrometheusMetrics()])
+
+    def _check(metric, method_name):
+        with patch_datadog() as stats:
+            getattr(metric, method_name)(1)
+
+        dd_metric, prom_metric = metric._delegates
+        [collected] = prom_metric._delegate.collect()
+
+        eq(len(stats), 1, stats)
+        eq(len(collected.samples) >= 1, True, collected.samples)
+
+    test_cases = [
+        (metrics.counter('commcare.name.1', ''), 'inc'),
+        (metrics.gauge('commcare.name.2', ''), 'set'),
+        (metrics.histogram('commcare.name.3', '', 'duration'), 'observe'),
+    ]
+    for metric, method_name in test_cases:
+        yield _check, metric, method_name

From eb8afe3fdc83b34a41d2531fc47dcd00952c9f6e Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 11 Mar 2020 16:45:31 +0200
Subject: [PATCH 07/40] example histogram

---
 corehq/apps/api/odata/utils.py | 28 +++++++++++++++++-----------
 1 file changed, 17 insertions(+), 11 deletions(-)

diff --git a/corehq/apps/api/odata/utils.py b/corehq/apps/api/odata/utils.py
index f09608e12274..6992808fa2e4 100644
--- a/corehq/apps/api/odata/utils.py
+++ b/corehq/apps/api/odata/utils.py
@@ -4,6 +4,7 @@
 from corehq.apps.export.models import ExportInstance
 from corehq.util.datadog.gauges import datadog_counter
 from corehq.util.datadog.utils import bucket_value
+from corehq.util.metrics import metrics
 
 FieldMetadata = namedtuple('FieldMetadata', ['name', 'odata_type'])
 
@@ -54,6 +55,13 @@ def _get_dot_path(export_column):
     return metadata
 
 
+odata_feed_access_histogram = metrics.histogram(
+    'commcare.odata_feed.test_v3', 'Odata Feed Access',
+    bucket_tag='duration_bucket', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s',
+    tag_names=['domain', 'feed_id', 'feed_type', 'username', 'row_count', 'column_count', 'size']
+)
+
+
 def record_feed_access_in_datadog(request, config_id, duration, response):
     config = ExportInstance.get(config_id)
     username = request.couch_user.username
@@ -64,14 +72,12 @@ def record_feed_access_in_datadog(request, config_id, duration, response):
         column_count = len(rows[0])
     except IndexError:
         column_count = 0
-    datadog_counter('commcare.odata_feed.test_v3', tags=[
-        'domain:{}'.format(request.domain),
-        'feed_id:{}'.format(config_id),
-        'feed_type:{}'.format(config.type),
-        'username:{}'.format(username),
-        'row_count:{}'.format(row_count),
-        'column_count:{}'.format(column_count),
-        'size:{}'.format(len(response.content)),
-        'duration:{}'.format(duration),
-        'duration_bucket:{}'.format(bucket_value(duration, (1, 5, 20, 60, 120, 300, 600), 's')),
-    ])
+    odata_feed_access_histogram.tag(
+        domain=request.domain,
+        feed_id=config_id,
+        feed_type=config.type,
+        username=username,
+        row_count=row_count,
+        column_count=column_count,
+        size=len(response.content),
+    ).observe(duration)

From 54f2d1bef3026d87557e78fa4b8b5c01ade70951 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 11 Mar 2020 17:09:47 +0200
Subject: [PATCH 08/40] convert sumbission metrics

---
 corehq/apps/receiverwrapper/views.py | 69 +++++++++++++++++-----------
 corehq/util/datadog/metrics.py       |  2 -
 2 files changed, 43 insertions(+), 28 deletions(-)

diff --git a/corehq/apps/receiverwrapper/views.py b/corehq/apps/receiverwrapper/views.py
index 10922e0b4e45..6f27ed881bfe 100644
--- a/corehq/apps/receiverwrapper/views.py
+++ b/corehq/apps/receiverwrapper/views.py
@@ -10,6 +10,7 @@
 
 import couchforms
 from casexml.apps.case.xform import get_case_updates, is_device_report
+from corehq.util.metrics import metrics
 from couchforms import openrosa_response
 from couchforms.const import MAGIC_PROPERTY, BadRequest
 from couchforms.getters import MultimediaBug
@@ -50,18 +51,38 @@
     convert_xform_to_json,
     should_use_sql_backend,
 )
-from corehq.util.datadog.gauges import datadog_counter, datadog_gauge
-from corehq.util.datadog.metrics import (
-    MULTIMEDIA_SUBMISSION_ERROR_COUNT,
-    XFORM_LOCKED_COUNT,
-)
-from corehq.util.datadog.utils import bucket_value
 from corehq.util.timer import TimingContext
 
 PROFILE_PROBABILITY = float(os.getenv('COMMCARE_PROFILE_SUBMISSION_PROBABILITY', 0))
 PROFILE_LIMIT = os.getenv('COMMCARE_PROFILE_SUBMISSION_LIMIT')
 PROFILE_LIMIT = int(PROFILE_LIMIT) if PROFILE_LIMIT is not None else 1
 
+corrupt_multimedia_counter = metrics.counter(
+    'commcare.corrupt_multimedia_submissions', 'Count of requests with corrupt multimedia',
+    tag_names=['domain', 'authenticated']
+)
+
+xform_locked_error_counter = metrics.counter(
+    'commcare.xformlocked.count', 'Count of locking errors',
+    tag_names=['domain', 'authenticated']
+)
+
+submission_counter = metrics.counter(
+    'commcare.xform_submissions.count', 'Count of form submissions',
+    tag_names=['domain', 'backend', 'submission_type', 'status_code']
+)
+
+submission_duration_histogram = metrics.histogram(
+    'commcare.xform_submissions.duration.seconds', 'Count of form submissions',
+    bucket_tag='duration', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s',
+    tag_names=['domain', 'backend', 'submission_type', 'status_code']
+)
+
+submission_lag_histogram = metrics.histogram(
+    'commcare.xform_submissions.lag.days', 'Count of form submissions',
+    bucket_tag='lag', buckets=(1, 2, 4, 7, 14, 31, 90), bucket_unit='d',
+    tag_names=['domain', 'backend', 'submission_type', 'status_code']
+)
 
 @profile_prod('commcare_receiverwapper_process_form.prof', probability=PROFILE_PROBABILITY, limit=PROFILE_LIMIT)
 def _process_form(request, domain, app_id, user_id, authenticated,
@@ -70,10 +91,10 @@ def _process_form(request, domain, app_id, user_id, authenticated,
     if rate_limit_submission(domain):
         return HttpTooManyRequests()
 
-    metric_tags = [
-        'backend:sql' if should_use_sql_backend(domain) else 'backend:couch',
-        'domain:{}'.format(domain),
-    ]
+    metric_tags = {
+        'backend': 'sql' if should_use_sql_backend(domain) else 'couch',
+        'domain': domain
+    }
 
     try:
         instance, attachments = couchforms.get_instance_and_attachment(request)
@@ -85,9 +106,9 @@ def _process_form(request, domain, app_id, user_id, authenticated,
         except:
             meta = {}
 
+        corrupt_multimedia_counter.tag(domain=domain, authenticated=authenticated).inc()
         return _submission_error(
-            request, "Received a submission with POST.keys()",
-            MULTIMEDIA_SUBMISSION_ERROR_COUNT, metric_tags,
+            request, "Received a submission with POST.keys()", metric_tags,
             domain, app_id, user_id, authenticated, meta,
         )
 
@@ -133,8 +154,9 @@ def _process_form(request, domain, app_id, user_id, authenticated,
         try:
             result = submission_post.run()
         except XFormLockError as err:
+            xform_locked_error_counter.tag(domain=domain, authenticated=authenticated).inc()
             return _submission_error(
-                request, "XFormLockError: %s" % err, XFORM_LOCKED_COUNT,
+                request, "XFormLockError: %s" % err,
                 metric_tags, domain, app_id, user_id, authenticated, status=423,
                 notify=False,
             )
@@ -145,7 +167,7 @@ def _process_form(request, domain, app_id, user_id, authenticated,
     return response
 
 
-def _submission_error(request, message, count_metric, metric_tags,
+def _submission_error(request, message, metric_tags,
         domain, app_id, user_id, authenticated, meta=None, status=400,
         notify=True):
     """Notify exception, datadog count, record metrics, construct response
@@ -157,7 +179,6 @@ def _submission_error(request, message, count_metric, metric_tags,
         "domain:{}".format(domain),
         "authenticated:{}".format(authenticated),
     ]
-    datadog_counter(count_metric, tags=details)
     if notify:
         details.extend([
             "user_id:{}".format(user_id),
@@ -172,24 +193,20 @@ def _submission_error(request, message, count_metric, metric_tags,
 
 
 def _record_metrics(tags, submission_type, response, timer=None, xform=None):
-    if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on:
-        lag = xform.received_on - xform.metadata.timeEnd
-        lag_days = lag.total_seconds() / 86400
-        tags += [
-            'lag:%s' % bucket_value(lag_days, (1, 2, 4, 7, 14, 31, 90), 'd')
-        ]
-
     tags += [
         'submission_type:{}'.format(submission_type),
         'status_code:{}'.format(response.status_code)
     ]
 
+    if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on:
+        lag = xform.received_on - xform.metadata.timeEnd
+        lag_days = lag.total_seconds() / 86400
+        submission_lag_histogram.tag(**tags).observe(lag_days)
+
     if timer:
-        tags += [
-            'duration:%s' % bucket_value(timer.duration, (1, 5, 20, 60, 120, 300, 600), 's'),
-        ]
+        submission_duration_histogram.tag(**tags).observe(timer.duration)
 
-    datadog_counter('commcare.xform_submissions.count', tags=tags)
+    submission_counter.tag(**tags).inc()
 
 
 @location_safe
diff --git a/corehq/util/datadog/metrics.py b/corehq/util/datadog/metrics.py
index 545be07a5a85..d56dea5154db 100644
--- a/corehq/util/datadog/metrics.py
+++ b/corehq/util/datadog/metrics.py
@@ -2,6 +2,4 @@
 ERROR_COUNT = 'commcare.error.count'
 REPEATER_ERROR_COUNT = 'commcare.repeaters.error'
 REPEATER_SUCCESS_COUNT = 'commcare.repeaters.success'
-MULTIMEDIA_SUBMISSION_ERROR_COUNT = 'commcare.corrupt-multimedia-submission.error.count'
 DATE_OPENED_CASEBLOCK_ERROR_COUNT = 'commcare.date-opened-caseblock-bug.error.count'
-XFORM_LOCKED_COUNT = 'commcare.xformlocked.count'

From eba69c4f8937352143272b8d27eda4064bba91cd Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 11 Mar 2020 17:29:04 +0200
Subject: [PATCH 09/40] docstrings

---
 corehq/util/metrics/metrics.py    | 2 ++
 corehq/util/metrics/prometheus.py | 1 +
 2 files changed, 3 insertions(+)

diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index c4038abaf6ed..65ea9def39b1 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -134,6 +134,8 @@ def enabled(self) -> bool:
 
 
 class DelegatedMetrics:
+    """This class makes the metric class instantiation lazy and
+    also multiple metrics providers to be used."""
     def __init__(self, delegates):
         self.delegates = delegates
         self._types = {
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
index c98146f3886d..30d4604d0fee 100644
--- a/corehq/util/metrics/prometheus.py
+++ b/corehq/util/metrics/prometheus.py
@@ -50,6 +50,7 @@ class Histogram(PrometheusMetricBase, HqHistogram):
     https://prometheus.io/docs/concepts/metric_types/#histogram
     """
     def _init_metric(self):
+        """Overriding this so that we can pass in the buckets to the Prometheus class"""
         HqHistogram._init_metric(self)  # skip _init_metric on PrometheusMetricBase
         self.name = self.name.replace('.', '_')
         self._delegate = self._kwargs.get('delegate')

From 01fbc076c50d8a67299340e290d2c5039102ea5a Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 11 Mar 2020 17:30:30 +0200
Subject: [PATCH 10/40] stickler

---
 corehq/util/metrics/__init__.py | 2 +-
 corehq/util/metrics/metrics.py  | 6 +++---
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 6789a3130bfc..1e6a6053ea49 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -1,7 +1,7 @@
 from django.utils.functional import SimpleLazyObject
 
 from corehq.util.metrics.datadog import DatadogMetrics
-from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, HqMetrics
+from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics
 from corehq.util.metrics.prometheus import PrometheusMetrics
 
 
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 65ea9def39b1..006971c30e9e 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -108,15 +108,15 @@ class HqMetrics(metaclass=abc.ABCMeta):
     def enabled(self) -> bool:
         raise NotImplementedError
 
-    def counter(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqCounter:
+    def counter(self, name: str, documentation: str, tag_names: Iterable = tuple()) -> HqCounter:
         return self._counter_class(name, documentation, tag_names)
 
-    def gauge(self, name: str, documentation: str, tag_names: Iterable=tuple()) -> HqGauge:
+    def gauge(self, name: str, documentation: str, tag_names: Iterable = tuple()) -> HqGauge:
         return self._gauge_class(name, documentation, tag_names)
 
     def histogram(self, name: str, documentation: str,
                   bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
-                  tag_names: Iterable=tuple()) -> HqHistogram:
+                  tag_names: Iterable = tuple()) -> HqHistogram:
         """Create a histogram metric. Histogram implementations differ between provider. See provider
         implementations for details.
         """

From 450f211179d41bc243c02470b269c0bdfdd3203f Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 12 Mar 2020 11:17:25 +0200
Subject: [PATCH 11/40] keep tag_values as dict instead of splitting and
 re-combining

---
 corehq/util/metrics/datadog.py            | 12 ++++++------
 corehq/util/metrics/metrics.py            | 15 ++++++++-------
 corehq/util/metrics/prometheus.py         |  4 ++--
 corehq/util/metrics/tests/test_metrics.py |  2 +-
 corehq/util/metrics/tests/utils.py        |  8 ++------
 5 files changed, 19 insertions(+), 22 deletions(-)

diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index e13d5c3d9a1a..574cf8de58bf 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -19,12 +19,12 @@
 statsd = DogStatsd(constant_tags=COMMON_TAGS)
 
 
-def _format_tags(tag_names, tag_values):
-    if not tag_names or not tag_values:
+def _format_tags(tag_values: dict):
+    if not tag_values:
         return None
 
     return [
-        f'{name}:{value}' for name, value in zip(tag_names, tag_values)
+        f'{name}:{value}' for name, value in tag_values.items()
     ]
 
 
@@ -37,13 +37,13 @@ def _datadog_record(fn, name, value, tags=None):
 
 class Counter(HqCounter):
     def _record(self, amount: float):
-        tags = _format_tags(self.tag_names, self.tag_values)
+        tags = _format_tags(self.tag_values)
         _datadog_record(statsd.increment, self.name, amount, tags)
 
 
 class Gauge(HqGauge):
     def _record(self, value):
-        tags = _format_tags(self.tag_names, self.tag_values)
+        tags = _format_tags(self.tag_values)
         _datadog_record(statsd.gauge, self.name, value, tags)
 
 
@@ -68,7 +68,7 @@ class Histogram(HqHistogram):
     More details: https://help.datadoghq.com/hc/en-us/articles/211545826
     """
     def _record(self, value: float):
-        tags = _format_tags(self.tag_names, self.tag_values)
+        tags = _format_tags(self.tag_values)
         if not tags:
             tags = []
         bucket = bucket_value(value, self._buckets, self._bucket_unit)
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 006971c30e9e..d1bf13680b7f 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -21,7 +21,7 @@ def _enforce_prefix(name, prefix):
 
 
 def _validate_tag_names(tag_names):
-    tag_names = tuple(tag_names)
+    tag_names = set(tag_names)
     for l in tag_names:
         if not METRIC_TAG_NAME_RE.match(l):
             raise ValueError('Invalid metric tag name: ' + l)
@@ -41,6 +41,10 @@ def __init__(self, name: str, documentation: str, tag_names: Iterable = tuple(),
         self.documentation = documentation
         self.tag_names = _validate_tag_names(tag_names)
         self.tag_values = kwargs.pop('tag_values', None)
+        if self.tag_values:
+            assert isinstance(self.tag_values, dict)
+            if self.tag_values.keys() != self.tag_names:
+                raise ValueError('Incorrect tag names')
         self._kwargs = kwargs
         self._init_metric()
 
@@ -48,13 +52,10 @@ def _init_metric(self):
         pass
 
     def tag(self, **tag_kwargs):
-        if sorted(tag_kwargs) != sorted(self.tag_names):
-            raise ValueError('Incorrect tag names')
+        tag_kwargs = {t: str(v) for t, v in tag_kwargs.items()}
+        return self._get_tagged_instance(tag_kwargs)
 
-        tag_values = tuple(str(tag_kwargs[t]) for t in self.tag_names)
-        return self._get_tagged_instance(tag_values)
-
-    def _get_tagged_instance(self, tag_values):
+    def _get_tagged_instance(self, tag_values: dict):
         return self.__class__(
             self.name, self.documentation, tag_names=self.tag_names, tag_values=tag_values, **self._kwargs
         )
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
index 30d4604d0fee..604b02ae7aaf 100644
--- a/corehq/util/metrics/prometheus.py
+++ b/corehq/util/metrics/prometheus.py
@@ -20,8 +20,8 @@ def _init_metric(self):
         delegate = self._kwargs.get('delegate')
         self._delegate = delegate or self._metric_class(self.name, self.documentation, self.tag_names)
 
-    def _get_tagged_instance(self, tag_values):
-        delegate = self._delegate.labels(**dict(zip(self.tag_names, tag_values)))
+    def _get_tagged_instance(self, tag_values: dict):
+        delegate = self._delegate.labels(**tag_values)
         return self.__class__(
             self.name, self.documentation,
             tag_names=self.tag_names, tag_values=tag_values, delegate=delegate, **self._kwargs
diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py
index c6d721921780..f77d7e929e9f 100644
--- a/corehq/util/metrics/tests/test_metrics.py
+++ b/corehq/util/metrics/tests/test_metrics.py
@@ -122,7 +122,7 @@ def assertHistogramMetric(self, metric, expected):
                     bucket = metric._buckets[-1]
                     prefix = 'over'
                 bucket_tag = (metric._bucket_tag, f'{prefix}_{bucket:03d}{metric._bucket_unit}')
-                expected_samples[tags + (bucket_tag,)] = val
+                expected_samples[tuple(sorted(tags + (bucket_tag,)))] = val
 
         actual = {
             key[1]: sum(val) for key, val in self.recorded_metrics.items()
diff --git a/corehq/util/metrics/tests/utils.py b/corehq/util/metrics/tests/utils.py
index 9c7f94b2d104..bbd9119e3113 100644
--- a/corehq/util/metrics/tests/utils.py
+++ b/corehq/util/metrics/tests/utils.py
@@ -9,14 +9,10 @@ def patch_datadog():
     def record(fn, name, value, tags=None):
         key = tuple([
             name,
-            tuple([tuple(t.split(':')) for t in (tags or [])]),
+            tuple(sorted([tuple(t.split(':')) for t in (tags or [])])),
         ])
         stats[key].append(value)
 
     stats = defaultdict(list)
-    patch = mock.patch("corehq.util.metrics.datadog._datadog_record", new=record)
-    patch.start()
-    try:
+    with mock.patch("corehq.util.metrics.datadog._datadog_record", new=record):
         yield stats
-    finally:
-        patch.stop()

From 2c3edefb732f894c64c891cd960bc42252f5a30f Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 12 Mar 2020 13:55:57 +0200
Subject: [PATCH 12/40] update links

---
 corehq/util/metrics/datadog.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index 574cf8de58bf..e463cccfcd53 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -65,7 +65,9 @@ class Histogram(HqHistogram):
         # resulting Datadog metric
         #    commcare.request.duration:1|c|#duration:lt_2ms
 
-    More details: https://help.datadoghq.com/hc/en-us/articles/211545826
+    For more details see:
+    * https://github.com/dimagi/commcare-hq/pull/17080
+    * https://github.com/dimagi/commcare-hq/pull/17030#issuecomment-315794700
     """
     def _record(self, value: float):
         tags = _format_tags(self.tag_values)

From 8664161faf69621076752cfb29fb1e07441441c2 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 12 Mar 2020 13:56:59 +0200
Subject: [PATCH 13/40] remove unnecessary list

Co-Authored-By: Daniel Miller <dmiller@dimagi.com>
---
 corehq/util/metrics/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index d1bf13680b7f..6520d6f5b6cd 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -92,7 +92,7 @@ def _init_metric(self):
         self._bucket_unit = self._kwargs.get('bucket_unit', '')
         self._bucket_tag = self._kwargs.get('bucket_tag')
         if self._bucket_tag in self.tag_names:
-            self.tag_names = tuple([name for name in self.tag_names if name != self._bucket_tag])
+            self.tag_names = tuple(name for name in self.tag_names if name != self._bucket_tag)
 
     def observe(self, value: float):
         """Update histogram with the given value."""

From 0bcedea6a39896985f37058701f8344367523904 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 12 Mar 2020 13:58:34 +0200
Subject: [PATCH 14/40] replace typle() with ()

---
 corehq/util/metrics/metrics.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index d1bf13680b7f..cef24f31cb87 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -33,7 +33,7 @@ def _validate_tag_names(tag_names):
 
 
 class MetricBase:
-    def __init__(self, name: str, documentation: str, tag_names: Iterable = tuple(), **kwargs):
+    def __init__(self, name: str, documentation: str, tag_names: Iterable = (), **kwargs):
         self.name = name
         if not METRIC_NAME_RE.match(name):
             raise ValueError('Invalid metric name: ' + name)
@@ -109,15 +109,15 @@ class HqMetrics(metaclass=abc.ABCMeta):
     def enabled(self) -> bool:
         raise NotImplementedError
 
-    def counter(self, name: str, documentation: str, tag_names: Iterable = tuple()) -> HqCounter:
+    def counter(self, name: str, documentation: str, tag_names: Iterable = ()) -> HqCounter:
         return self._counter_class(name, documentation, tag_names)
 
-    def gauge(self, name: str, documentation: str, tag_names: Iterable = tuple()) -> HqGauge:
+    def gauge(self, name: str, documentation: str, tag_names: Iterable = ()) -> HqGauge:
         return self._gauge_class(name, documentation, tag_names)
 
     def histogram(self, name: str, documentation: str,
                   bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
-                  tag_names: Iterable = tuple()) -> HqHistogram:
+                  tag_names: Iterable = ()) -> HqHistogram:
         """Create a histogram metric. Histogram implementations differ between provider. See provider
         implementations for details.
         """

From 904e358edeebc5078507700554157cdc933276d4 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 12 Mar 2020 15:34:36 +0200
Subject: [PATCH 15/40] fix tags

---
 corehq/apps/receiverwrapper/views.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/corehq/apps/receiverwrapper/views.py b/corehq/apps/receiverwrapper/views.py
index 6f27ed881bfe..22e53df3c76d 100644
--- a/corehq/apps/receiverwrapper/views.py
+++ b/corehq/apps/receiverwrapper/views.py
@@ -193,10 +193,10 @@ def _submission_error(request, message, metric_tags,
 
 
 def _record_metrics(tags, submission_type, response, timer=None, xform=None):
-    tags += [
-        'submission_type:{}'.format(submission_type),
-        'status_code:{}'.format(response.status_code)
-    ]
+    tags.update({
+        'submission_type': submission_type,
+        'status_code': response.status_code
+    })
 
     if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on:
         lag = xform.received_on - xform.metadata.timeEnd

From d68e104a2df754e02b3b3154382f34489e03c5de Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 12 Mar 2020 15:34:42 +0200
Subject: [PATCH 16/40] pass other args

---
 corehq/util/metrics/metrics.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 4fdf53446923..3d37368e4a40 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -161,7 +161,7 @@ def __init__(self, delegates, record_fn_name):
         self._record_fn_name = record_fn_name
 
     def tag(self, *args, **kwargs):
-        return self.__class__([d.tag(*args, **kwargs) for d in self._delegates])
+        return self.__class__([d.tag(*args, **kwargs) for d in self._delegates], self._record_fn_name)
 
     def __getattr__(self, item):
         if item == self._record_fn_name:

From 5c000528e4c114005cb25faa7e3fbc288722a8c7 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 12 Mar 2020 16:53:42 +0200
Subject: [PATCH 17/40] revert change to datadog bucketing boundry

---
 corehq/util/datadog/utils.py              |  2 +-
 corehq/util/metrics/tests/test_metrics.py | 63 +++++++++++++----------
 2 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/corehq/util/datadog/utils.py b/corehq/util/datadog/utils.py
index c61a4d099cb1..9d495679260f 100644
--- a/corehq/util/datadog/utils.py
+++ b/corehq/util/datadog/utils.py
@@ -115,7 +115,7 @@ def bucket_value(value, buckets, unit=''):
     lt_template = "lt_{:0%s}{}" % number_length
     over_template = "over_{:0%s}{}" % number_length
     for bucket in buckets:
-        if value <= bucket:
+        if value < bucket:
             return lt_template.format(bucket, unit)
     return over_template.format(buckets[-1], unit)
 
diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py
index f77d7e929e9f..ea6b878f6e8b 100644
--- a/corehq/util/metrics/tests/test_metrics.py
+++ b/corehq/util/metrics/tests/test_metrics.py
@@ -44,24 +44,6 @@ def test_gauge(self):
             (('t1', 'c'), ('t2', 'b')): 5,
         })
 
-    def test_histogram(self):
-        histogram = self.provider.histogram(
-            'commcare.test.histogram', 'Description', 'duration',
-            buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2']
-        )
-        tagged_1 = histogram.tag(t1='a', t2='b')
-        tagged_1.observe(0.2)
-        tagged_1.observe(0.7)
-        tagged_1.observe(2.5)
-
-        tagged_2 = histogram.tag(t1='c', t2='b')
-        tagged_2.observe(2)
-        tagged_2.observe(5)
-        self.assertHistogramMetric(histogram, {
-            (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1},
-            (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1}
-        })
-
     def assertCounterMetric(self, metric: HqCounter, expected: Dict[Tuple[Tuple[str, str], ...], float]):
         """
         :param metric: metric class
@@ -76,15 +58,6 @@ def assertGaugeMetric(self, metric: HqGauge, expected: Dict[Tuple[Tuple[str, str
         """
         raise NotImplementedError
 
-    def assertHistogramMetric(self,
-                              metric: HqHistogram,
-                              expected: Dict[Tuple[Tuple[str, str], ...], Dict[float, int]]):
-        """
-        :param metric: metric class
-        :param expected: dict mapping tag tuples to a dict of bucket values
-        """
-        raise NotImplementedError
-
 
 class TestDatadogMetrics(_TestMetrics):
     provider_class = DatadogMetrics
@@ -98,6 +71,24 @@ def tearDown(self) -> None:
         self.patch.__exit__(None, None, None)
         super().tearDown()
 
+    def test_histogram(self):
+        histogram = self.provider.histogram(
+            'commcare.test.histogram', 'Description', 'duration',
+            buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2']
+        )
+        tagged_1 = histogram.tag(t1='a', t2='b')
+        tagged_1.observe(0.2)
+        tagged_1.observe(0.7)
+        tagged_1.observe(2.5)
+
+        tagged_2 = histogram.tag(t1='c', t2='b')
+        tagged_2.observe(2)
+        tagged_2.observe(5)
+        self.assertHistogramMetric(histogram, {
+            (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1},
+            (('t1', 'c'), ('t2', 'b')): {3: 1, INF: 1}
+        })
+
     def assertCounterMetric(self, metric, expected):
         self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name})
         actual = {
@@ -133,6 +124,24 @@ def assertHistogramMetric(self, metric, expected):
 class TestPrometheusMetrics(_TestMetrics):
     provider_class = PrometheusMetrics
 
+    def test_histogram(self):
+        histogram = self.provider.histogram(
+            'commcare.test.histogram', 'Description', 'duration',
+            buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2']
+        )
+        tagged_1 = histogram.tag(t1='a', t2='b')
+        tagged_1.observe(0.2)
+        tagged_1.observe(0.7)
+        tagged_1.observe(2.5)
+
+        tagged_2 = histogram.tag(t1='c', t2='b')
+        tagged_2.observe(2)
+        tagged_2.observe(5)
+        self.assertHistogramMetric(histogram, {
+            (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1},
+            (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1}
+        })
+
     def _samples_to_dics(self, samples, filter_name=None):
         """Convert a Sample tuple into a dict((name, (labels tuple)) -> value)"""
         return {

From e00b7e176bb3c296f003d49d7e39354e5f3b1390 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Mon, 16 Mar 2020 13:40:39 +0200
Subject: [PATCH 18/40] remove unnecessary list

---
 corehq/util/metrics/tests/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/corehq/util/metrics/tests/utils.py b/corehq/util/metrics/tests/utils.py
index bbd9119e3113..3c012c0ff9d5 100644
--- a/corehq/util/metrics/tests/utils.py
+++ b/corehq/util/metrics/tests/utils.py
@@ -9,7 +9,7 @@ def patch_datadog():
     def record(fn, name, value, tags=None):
         key = tuple([
             name,
-            tuple(sorted([tuple(t.split(':')) for t in (tags or [])])),
+            tuple(sorted(tuple(t.split(':')) for t in (tags or []))),
         ])
         stats[key].append(value)
 

From 20e8a6172ebbef0560bdfd607041b25313a2bbcf Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Mon, 16 Mar 2020 13:44:35 +0200
Subject: [PATCH 19/40] apply tags at the same time as recording the metric

This change means we don't have to instantiate a new
class when applying the tags which simplifies the class
structure a lot.
---
 corehq/apps/api/odata/utils.py            |  9 ++-
 corehq/apps/receiverwrapper/views.py      | 10 ++--
 corehq/util/metrics/datadog.py            | 12 ++--
 corehq/util/metrics/metrics.py            | 68 ++++++++++-------------
 corehq/util/metrics/prometheus.py         | 51 +++++++----------
 corehq/util/metrics/tests/test_metrics.py | 42 +++++++-------
 6 files changed, 83 insertions(+), 109 deletions(-)

diff --git a/corehq/apps/api/odata/utils.py b/corehq/apps/api/odata/utils.py
index 6992808fa2e4..7628a03eb8c7 100644
--- a/corehq/apps/api/odata/utils.py
+++ b/corehq/apps/api/odata/utils.py
@@ -2,8 +2,6 @@
 from collections import namedtuple
 
 from corehq.apps.export.models import ExportInstance
-from corehq.util.datadog.gauges import datadog_counter
-from corehq.util.datadog.utils import bucket_value
 from corehq.util.metrics import metrics
 
 FieldMetadata = namedtuple('FieldMetadata', ['name', 'odata_type'])
@@ -72,12 +70,13 @@ def record_feed_access_in_datadog(request, config_id, duration, response):
         column_count = len(rows[0])
     except IndexError:
         column_count = 0
-    odata_feed_access_histogram.tag(
+    odata_feed_access_histogram.observe(
+        duration,
         domain=request.domain,
         feed_id=config_id,
         feed_type=config.type,
         username=username,
         row_count=row_count,
         column_count=column_count,
-        size=len(response.content),
-    ).observe(duration)
+        size=len(response.content)
+    )
diff --git a/corehq/apps/receiverwrapper/views.py b/corehq/apps/receiverwrapper/views.py
index 22e53df3c76d..16d28bf5a38b 100644
--- a/corehq/apps/receiverwrapper/views.py
+++ b/corehq/apps/receiverwrapper/views.py
@@ -106,7 +106,7 @@ def _process_form(request, domain, app_id, user_id, authenticated,
         except:
             meta = {}
 
-        corrupt_multimedia_counter.tag(domain=domain, authenticated=authenticated).inc()
+        corrupt_multimedia_counter.inc(domain=domain, authenticated=authenticated)
         return _submission_error(
             request, "Received a submission with POST.keys()", metric_tags,
             domain, app_id, user_id, authenticated, meta,
@@ -154,7 +154,7 @@ def _process_form(request, domain, app_id, user_id, authenticated,
         try:
             result = submission_post.run()
         except XFormLockError as err:
-            xform_locked_error_counter.tag(domain=domain, authenticated=authenticated).inc()
+            xform_locked_error_counter.inc(domain=domain, authenticated=authenticated)
             return _submission_error(
                 request, "XFormLockError: %s" % err,
                 metric_tags, domain, app_id, user_id, authenticated, status=423,
@@ -201,12 +201,12 @@ def _record_metrics(tags, submission_type, response, timer=None, xform=None):
     if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on:
         lag = xform.received_on - xform.metadata.timeEnd
         lag_days = lag.total_seconds() / 86400
-        submission_lag_histogram.tag(**tags).observe(lag_days)
+        submission_lag_histogram.observe(lag_days, **tags)
 
     if timer:
-        submission_duration_histogram.tag(**tags).observe(timer.duration)
+        submission_duration_histogram.observe(timer.duration, **tags)
 
-    submission_counter.tag(**tags).inc()
+    submission_counter.inc(**tags)
 
 
 @location_safe
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index e463cccfcd53..0163e5696924 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -36,14 +36,14 @@ def _datadog_record(fn, name, value, tags=None):
 
 
 class Counter(HqCounter):
-    def _record(self, amount: float):
-        tags = _format_tags(self.tag_values)
+    def _record(self, amount: float, tag_values: dict):
+        tags = _format_tags(tag_values)
         _datadog_record(statsd.increment, self.name, amount, tags)
 
 
 class Gauge(HqGauge):
-    def _record(self, value):
-        tags = _format_tags(self.tag_values)
+    def _record(self, value: float, tag_values: dict):
+        tags = _format_tags(tag_values)
         _datadog_record(statsd.gauge, self.name, value, tags)
 
 
@@ -69,8 +69,8 @@ class Histogram(HqHistogram):
     * https://github.com/dimagi/commcare-hq/pull/17080
     * https://github.com/dimagi/commcare-hq/pull/17030#issuecomment-315794700
     """
-    def _record(self, value: float):
-        tags = _format_tags(self.tag_values)
+    def _record(self, value: float, tag_values: dict):
+        tags = _format_tags(tag_values)
         if not tags:
             tags = []
         bucket = bucket_value(value, self._buckets, self._bucket_unit)
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 3d37368e4a40..f4a699145ace 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -33,53 +33,43 @@ def _validate_tag_names(tag_names):
 
 
 class MetricBase:
-    def __init__(self, name: str, documentation: str, tag_names: Iterable = (), **kwargs):
+    def __init__(self, name: str, documentation: str, tag_names: Iterable = ()):
         self.name = name
         if not METRIC_NAME_RE.match(name):
             raise ValueError('Invalid metric name: ' + name)
         _enforce_prefix(name, 'commcare')
         self.documentation = documentation
         self.tag_names = _validate_tag_names(tag_names)
-        self.tag_values = kwargs.pop('tag_values', None)
-        if self.tag_values:
-            assert isinstance(self.tag_values, dict)
-            if self.tag_values.keys() != self.tag_names:
-                raise ValueError('Incorrect tag names')
-        self._kwargs = kwargs
         self._init_metric()
 
     def _init_metric(self):
         pass
 
-    def tag(self, **tag_kwargs):
-        tag_kwargs = {t: str(v) for t, v in tag_kwargs.items()}
-        return self._get_tagged_instance(tag_kwargs)
-
-    def _get_tagged_instance(self, tag_values: dict):
-        return self.__class__(
-            self.name, self.documentation, tag_names=self.tag_names, tag_values=tag_values, **self._kwargs
-        )
-
-    def _validate_tags(self):
-        if self.tag_names and not self.tag_values:
+    def _validate_tags(self, tag_values: dict):
+        if self.tag_names and not tag_values:
             raise Exception('Metric has missing tag values.')
 
-    def _record(self, value: float):
+        if tag_values:
+            assert isinstance(tag_values, dict)
+            if tag_values.keys() != self.tag_names:
+                raise ValueError('Incorrect tag names')
+
+    def _record(self, value: float, tags: dict):
         raise NotImplementedError
 
 
 class HqCounter(MetricBase):
-    def inc(self, amount: float = 1):
+    def inc(self, amount: float = 1, **tags):
         """Increment the counter by the given amount."""
-        self._validate_tags()
-        self._record(amount)
+        self._validate_tags(tags)
+        self._record(amount, tags)
 
 
 class HqGauge(MetricBase):
-    def set(self, value: float):
+    def set(self, value: float, **tags):
         """Set gauge to the given value."""
-        self._validate_tags()
-        self._record(value)
+        self._validate_tags(tags)
+        self._record(value, tags)
 
 
 DEFAULT_BUCKETS = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF)
@@ -87,17 +77,20 @@ def set(self, value: float):
 
 class HqHistogram(MetricBase):
 
-    def _init_metric(self):
-        self._buckets = self._kwargs.get('buckets') or DEFAULT_BUCKETS
-        self._bucket_unit = self._kwargs.get('bucket_unit', '')
-        self._bucket_tag = self._kwargs.get('bucket_tag')
-        if self._bucket_tag in self.tag_names:
-            self.tag_names = tuple(name for name in self.tag_names if name != self._bucket_tag)
-
-    def observe(self, value: float):
+    def __init__(self, name: str, documentation: str,
+                  bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
+                  tag_names: Iterable = ()):
+        self._bucket_tag = bucket_tag
+        self._buckets = buckets
+        self._bucket_unit = bucket_unit
+        if self._bucket_tag in tag_names:
+            tag_names = tuple(name for name in tag_names if name != bucket_tag)
+        super().__init__(name, documentation, tag_names)
+
+    def observe(self, value: float, **tags):
         """Update histogram with the given value."""
-        self._validate_tags()
-        self._record(value)
+        self._validate_tags(tags)
+        self._record(value, tags)
 
 
 class HqMetrics(metaclass=abc.ABCMeta):
@@ -122,7 +115,7 @@ def histogram(self, name: str, documentation: str,
         implementations for details.
         """
         return self._histogram_class(
-            name, documentation, tag_names, bucket_tag=bucket_tag, buckets=buckets, bucket_unit=bucket_unit
+            name, documentation, bucket_tag, buckets=buckets, bucket_unit=bucket_unit, tag_names=tag_names
         )
 
 
@@ -160,9 +153,6 @@ def __init__(self, delegates, record_fn_name):
         self._delegates = delegates
         self._record_fn_name = record_fn_name
 
-    def tag(self, *args, **kwargs):
-        return self.__class__([d.tag(*args, **kwargs) for d in self._delegates], self._record_fn_name)
-
     def __getattr__(self, item):
         if item == self._record_fn_name:
             def record(*args, **kwargs):
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
index 604b02ae7aaf..cce9a0c671ed 100644
--- a/corehq/util/metrics/prometheus.py
+++ b/corehq/util/metrics/prometheus.py
@@ -11,54 +11,43 @@
 from prometheus_client import Histogram as PHistogram
 
 
-class PrometheusMetricBase(MetricBase):
-    _metric_class = None
+class Counter(HqCounter):
+    """https://prometheus.io/docs/concepts/metric_types/#counter"""
 
     def _init_metric(self):
-        super()._init_metric()
         self.name = self.name.replace('.', '_')
-        delegate = self._kwargs.get('delegate')
-        self._delegate = delegate or self._metric_class(self.name, self.documentation, self.tag_names)
-
-    def _get_tagged_instance(self, tag_values: dict):
-        delegate = self._delegate.labels(**tag_values)
-        return self.__class__(
-            self.name, self.documentation,
-            tag_names=self.tag_names, tag_values=tag_values, delegate=delegate, **self._kwargs
-        )
-
-
-class Counter(PrometheusMetricBase, HqCounter):
-    """https://prometheus.io/docs/concepts/metric_types/#counter"""
-    _metric_class = PCounter
+        self._delegate = PCounter(self.name, self.documentation, self.tag_names)
 
-    def _record(self, amount: float):
-        self._delegate.inc(amount)
+    def _record(self, amount: float, tags):
+        _get_labeled(self._delegate, tags).inc(amount)
 
 
-class Gauge(PrometheusMetricBase, HqGauge):
+class Gauge(HqGauge):
     """https://prometheus.io/docs/concepts/metric_types/#gauge"""
-    _metric_class = PGauge
 
-    def _record(self, value: float):
-        self._delegate.set(value)
+    def _init_metric(self):
+        self.name = self.name.replace('.', '_')
+        self._delegate = PGauge(self.name, self.documentation, self.tag_names)
+
+    def _record(self, value: float, tags):
+        _get_labeled(self._delegate, tags).set(value)
 
 
-class Histogram(PrometheusMetricBase, HqHistogram):
+class Histogram(HqHistogram):
     """This metric class implements the native Prometheus Histogram type
 
     https://prometheus.io/docs/concepts/metric_types/#histogram
     """
     def _init_metric(self):
-        """Overriding this so that we can pass in the buckets to the Prometheus class"""
-        HqHistogram._init_metric(self)  # skip _init_metric on PrometheusMetricBase
         self.name = self.name.replace('.', '_')
-        self._delegate = self._kwargs.get('delegate')
-        if not self._delegate:
-            self._delegate = PHistogram(self.name, self.documentation, self.tag_names, buckets=self._buckets)
+        self._delegate = PHistogram(self.name, self.documentation, self.tag_names, buckets=self._buckets)
+
+    def _record(self, value: float, tags: dict):
+        _get_labeled(self._delegate, tags).observe(value)
+
 
-    def _record(self, value: float):
-        self._delegate.observe(value)
+def _get_labeled(metric, labels):
+    return metric.labels(**labels) if labels else metric
 
 
 class PrometheusMetrics(HqMetrics):
diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py
index ea6b878f6e8b..bb9c8232a00b 100644
--- a/corehq/util/metrics/tests/test_metrics.py
+++ b/corehq/util/metrics/tests/test_metrics.py
@@ -26,9 +26,9 @@ def setUpClass(cls):
 
     def test_counter(self):
         counter = self.provider.counter('commcare.test.counter', 'Description', tag_names=['t1', 't2'])
-        counter.tag(t1='a', t2='b').inc()
-        counter.tag(t1='c', t2='b').inc(2)
-        counter.tag(t1='c', t2='b').inc()
+        counter.inc(t1='a', t2='b')
+        counter.inc(2, t1='c', t2='b')
+        counter.inc(t1='c', t2='b')
         self.assertCounterMetric(counter, {
             (('t1', 'a'), ('t2', 'b')): 1,
             (('t1', 'c'), ('t2', 'b')): 3,
@@ -36,9 +36,9 @@ def test_counter(self):
 
     def test_gauge(self):
         gauge = self.provider.gauge('commcare.test.gauge', 'Description', tag_names=['t1', 't2'])
-        gauge.tag(t1='a', t2='b').set(4.2)
-        gauge.tag(t1='c', t2='b').set(2)
-        gauge.tag(t1='c', t2='b').set(5)
+        gauge.set(4.2, t1='a', t2='b')
+        gauge.set(2, t1='c', t2='b')
+        gauge.set(5, t1='c', t2='b')
         self.assertGaugeMetric(gauge, {
             (('t1', 'a'), ('t2', 'b')): 4.2,
             (('t1', 'c'), ('t2', 'b')): 5,
@@ -76,14 +76,12 @@ def test_histogram(self):
             'commcare.test.histogram', 'Description', 'duration',
             buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2']
         )
-        tagged_1 = histogram.tag(t1='a', t2='b')
-        tagged_1.observe(0.2)
-        tagged_1.observe(0.7)
-        tagged_1.observe(2.5)
-
-        tagged_2 = histogram.tag(t1='c', t2='b')
-        tagged_2.observe(2)
-        tagged_2.observe(5)
+        histogram.observe(0.2, t1='a', t2='b')
+        histogram.observe(0.7, t1='a', t2='b')
+        histogram.observe(2.5, t1='a', t2='b')
+
+        histogram.observe(2, t1='c', t2='b')
+        histogram.observe(5, t1='c', t2='b')
         self.assertHistogramMetric(histogram, {
             (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1},
             (('t1', 'c'), ('t2', 'b')): {3: 1, INF: 1}
@@ -129,14 +127,12 @@ def test_histogram(self):
             'commcare.test.histogram', 'Description', 'duration',
             buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2']
         )
-        tagged_1 = histogram.tag(t1='a', t2='b')
-        tagged_1.observe(0.2)
-        tagged_1.observe(0.7)
-        tagged_1.observe(2.5)
-
-        tagged_2 = histogram.tag(t1='c', t2='b')
-        tagged_2.observe(2)
-        tagged_2.observe(5)
+        histogram.observe(0.2, t1='a', t2='b')
+        histogram.observe(0.7, t1='a', t2='b')
+        histogram.observe(2.5, t1='a', t2='b')
+
+        histogram.observe(2, t1='c', t2='b')
+        histogram.observe(5, t1='c', t2='b')
         self.assertHistogramMetric(histogram, {
             (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1},
             (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1}
@@ -145,7 +141,7 @@ def test_histogram(self):
     def _samples_to_dics(self, samples, filter_name=None):
         """Convert a Sample tuple into a dict((name, (labels tuple)) -> value)"""
         return {
-            tuple(sample.labels.items()): sample.value
+            tuple(sorted(sample.labels.items())): sample.value
             for sample in samples
             if not filter_name or sample.name == filter_name
         }

From 0ab0006ac2660650e5ca743eb669b5dc841a11e6 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Mon, 16 Mar 2020 15:04:49 +0200
Subject: [PATCH 20/40] dummy metric

---
 corehq/util/metrics/metrics.py | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index f4a699145ace..5abb6811f308 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -119,9 +119,20 @@ def histogram(self, name: str, documentation: str,
         )
 
 
+class DummyMetric:
+    def __init__(self, *args, **kwargs):
+        pass
+
+    def __getattr__(self, item):
+        if item in ('inc', 'set', 'observe'):
+            return lambda *args, **kwargs: None
+        raise AttributeError
+
+
 class DummyMetrics(HqMetrics):
-    _counter_class = HqCounter
-    _gauge_class = HqGauge
+    _counter_class = DummyMetric
+    _gauge_class = DummyMetric
+    _histogram_class = DummyMetric
 
     def enabled(self) -> bool:
         return True

From 02b40eeee7fec129c219d3819744fb8097e0d213 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Mon, 16 Mar 2020 18:32:43 +0200
Subject: [PATCH 21/40] functional interface

---
 corehq/apps/api/odata/utils.py            |  30 ++---
 corehq/apps/receiverwrapper/views.py      |  50 +++----
 corehq/util/metrics/__init__.py           |  38 ++++--
 corehq/util/metrics/datadog.py            | 106 +++++++--------
 corehq/util/metrics/metrics.py            | 151 +++++----------------
 corehq/util/metrics/prometheus.py         |  78 ++++-------
 corehq/util/metrics/tests/test_metrics.py | 155 ++++++++--------------
 7 files changed, 230 insertions(+), 378 deletions(-)

diff --git a/corehq/apps/api/odata/utils.py b/corehq/apps/api/odata/utils.py
index 7628a03eb8c7..cee2b6447997 100644
--- a/corehq/apps/api/odata/utils.py
+++ b/corehq/apps/api/odata/utils.py
@@ -2,7 +2,7 @@
 from collections import namedtuple
 
 from corehq.apps.export.models import ExportInstance
-from corehq.util.metrics import metrics
+from corehq.util.metrics import metrics_histogram
 
 FieldMetadata = namedtuple('FieldMetadata', ['name', 'odata_type'])
 
@@ -53,13 +53,6 @@ def _get_dot_path(export_column):
     return metadata
 
 
-odata_feed_access_histogram = metrics.histogram(
-    'commcare.odata_feed.test_v3', 'Odata Feed Access',
-    bucket_tag='duration_bucket', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s',
-    tag_names=['domain', 'feed_id', 'feed_type', 'username', 'row_count', 'column_count', 'size']
-)
-
-
 def record_feed_access_in_datadog(request, config_id, duration, response):
     config = ExportInstance.get(config_id)
     username = request.couch_user.username
@@ -70,13 +63,16 @@ def record_feed_access_in_datadog(request, config_id, duration, response):
         column_count = len(rows[0])
     except IndexError:
         column_count = 0
-    odata_feed_access_histogram.observe(
-        duration,
-        domain=request.domain,
-        feed_id=config_id,
-        feed_type=config.type,
-        username=username,
-        row_count=row_count,
-        column_count=column_count,
-        size=len(response.content)
+    metrics_histogram(
+        'commcare.odata_feed.test_v3', duration,
+        bucket_tag='duration_bucket', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s',
+        tags={
+            'domain': request.domain,
+            'feed_id': config_id,
+            'feed_type': config.type,
+            'username': username,
+            'row_count': row_count,
+            'column_count': column_count,
+            'size': len(response.content)
+        }
     )
diff --git a/corehq/apps/receiverwrapper/views.py b/corehq/apps/receiverwrapper/views.py
index 16d28bf5a38b..00d00de4b8cb 100644
--- a/corehq/apps/receiverwrapper/views.py
+++ b/corehq/apps/receiverwrapper/views.py
@@ -10,7 +10,7 @@
 
 import couchforms
 from casexml.apps.case.xform import get_case_updates, is_device_report
-from corehq.util.metrics import metrics
+from corehq.util.metrics import metrics, metrics_counter, metrics_histogram
 from couchforms import openrosa_response
 from couchforms.const import MAGIC_PROPERTY, BadRequest
 from couchforms.getters import MultimediaBug
@@ -57,32 +57,6 @@
 PROFILE_LIMIT = os.getenv('COMMCARE_PROFILE_SUBMISSION_LIMIT')
 PROFILE_LIMIT = int(PROFILE_LIMIT) if PROFILE_LIMIT is not None else 1
 
-corrupt_multimedia_counter = metrics.counter(
-    'commcare.corrupt_multimedia_submissions', 'Count of requests with corrupt multimedia',
-    tag_names=['domain', 'authenticated']
-)
-
-xform_locked_error_counter = metrics.counter(
-    'commcare.xformlocked.count', 'Count of locking errors',
-    tag_names=['domain', 'authenticated']
-)
-
-submission_counter = metrics.counter(
-    'commcare.xform_submissions.count', 'Count of form submissions',
-    tag_names=['domain', 'backend', 'submission_type', 'status_code']
-)
-
-submission_duration_histogram = metrics.histogram(
-    'commcare.xform_submissions.duration.seconds', 'Count of form submissions',
-    bucket_tag='duration', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s',
-    tag_names=['domain', 'backend', 'submission_type', 'status_code']
-)
-
-submission_lag_histogram = metrics.histogram(
-    'commcare.xform_submissions.lag.days', 'Count of form submissions',
-    bucket_tag='lag', buckets=(1, 2, 4, 7, 14, 31, 90), bucket_unit='d',
-    tag_names=['domain', 'backend', 'submission_type', 'status_code']
-)
 
 @profile_prod('commcare_receiverwapper_process_form.prof', probability=PROFILE_PROBABILITY, limit=PROFILE_LIMIT)
 def _process_form(request, domain, app_id, user_id, authenticated,
@@ -106,7 +80,9 @@ def _process_form(request, domain, app_id, user_id, authenticated,
         except:
             meta = {}
 
-        corrupt_multimedia_counter.inc(domain=domain, authenticated=authenticated)
+        metrics_counter('commcare.corrupt_multimedia_submissions', tags={
+            'domain': domain, 'authenticated': authenticated
+        })
         return _submission_error(
             request, "Received a submission with POST.keys()", metric_tags,
             domain, app_id, user_id, authenticated, meta,
@@ -154,7 +130,9 @@ def _process_form(request, domain, app_id, user_id, authenticated,
         try:
             result = submission_post.run()
         except XFormLockError as err:
-            xform_locked_error_counter.inc(domain=domain, authenticated=authenticated)
+            metrics_counter('commcare.xformlocked.count', tags={
+                'domain': domain, 'authenticated': authenticated
+            })
             return _submission_error(
                 request, "XFormLockError: %s" % err,
                 metric_tags, domain, app_id, user_id, authenticated, status=423,
@@ -201,12 +179,20 @@ def _record_metrics(tags, submission_type, response, timer=None, xform=None):
     if xform and xform.metadata and xform.metadata.timeEnd and xform.received_on:
         lag = xform.received_on - xform.metadata.timeEnd
         lag_days = lag.total_seconds() / 86400
-        submission_lag_histogram.observe(lag_days, **tags)
+        metrics_histogram(
+            'commcare.xform_submissions.lag.days', lag_days,
+            bucket_tag='lag', buckets=(1, 2, 4, 7, 14, 31, 90), bucket_unit='d',
+            tags=tags
+        )
 
     if timer:
-        submission_duration_histogram.observe(timer.duration, **tags)
+        metrics_histogram(
+            'commcare.xform_submissions.duration.seconds', timer.duration,
+            bucket_tag='duration', buckets=(1, 5, 20, 60, 120, 300, 600), bucket_unit='s',
+            tags=tags
+        )
 
-    submission_counter.inc(**tags)
+    metrics_counter('commcare.xform_submissions.count', tags=tags)
 
 
 @location_safe
diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 1e6a6053ea49..937a8f483efb 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -1,15 +1,37 @@
-from django.utils.functional import SimpleLazyObject
+from typing import List, Iterable
 
 from corehq.util.metrics.datadog import DatadogMetrics
-from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics
+from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, DEFAULT_BUCKETS
 from corehq.util.metrics.prometheus import PrometheusMetrics
 
+_metrics = None
 
-def _get_metrics():
-    enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()]))
-    if not enabled:
-        return [DummyMetrics()]
-    return enabled
 
+def _get_metrics_provider():
+    global _metrics
+    if not _metrics:
+        enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()]))
+        if not enabled:
+            _metrics = DummyMetrics()
+        elif len(enabled) > 1:
+            _metrics = DelegatedMetrics(enabled)
+        else:
+            _metrics = enabled[0]
+    return _metrics
 
-metrics = DelegatedMetrics(SimpleLazyObject(_get_metrics))  # singleton/global
+
+def metrics_counter(name: str, value: float = 1, tags: dict = None, documentation: str = ''):
+    provider = _get_metrics_provider()
+    provider.counter(name, value, tags, documentation)
+
+
+def metrics_gauge(name: str, value: float, tags: dict = None, documentation: str = ''):
+    provider = _get_metrics_provider()
+    provider.gauge(name, value, tags, documentation)
+
+
+def metrics_histogram(name: str, value: float,
+                  bucket_tag: str, buckets: Iterable[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
+                  tags: dict = None, documentation: str = ''):
+    provider = _get_metrics_provider()
+    provider.histogram(name, value, bucket_tag, buckets, bucket_unit, tags, documentation)
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index 0163e5696924..a2808f72195f 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -1,14 +1,10 @@
 import logging
+from typing import List
 
 from django.conf import settings
 
 from corehq.util.datadog.utils import bucket_value
-from corehq.util.metrics.metrics import (
-    HqCounter,
-    HqGauge,
-    HqHistogram,
-    HqMetrics,
-)
+from corehq.util.metrics.metrics import HqMetrics
 from datadog import api
 from datadog.dogstatsd.base import DogStatsd
 
@@ -19,6 +15,52 @@
 statsd = DogStatsd(constant_tags=COMMON_TAGS)
 
 
+class DatadogMetrics(HqMetrics):
+    def validate(self):
+        if not (api._api_key and api._application_key):
+            raise Exception("Datadog not configured. Set DATADOG_API_KEY and DATADOG_APP_KEY in settings.")
+
+    def _counter(self, name: str, value: float, tags: dict = None, documentation: str = ''):
+        dd_tags = _format_tags(tags)
+        _datadog_record(statsd.increment, name, value, dd_tags)
+
+    def _gauge(self, name: str, value: float, tags: dict = None, documentation: str = ''):
+        dd_tags = _format_tags(tags)
+        _datadog_record(statsd.gauge, name, value, dd_tags)
+
+    def _histogram(self, name: str, value: float,
+                  bucket_tag: str, buckets: List[int], bucket_unit: str = '',
+                  tags: dict = None, documentation: str = ''):
+        """
+        This implementation of histogram uses tagging to record the buckets.
+        It does not use the Datadog Histogram metric type.
+
+        The metric itself will be incremented by 1 on each call to `observe`. The value
+        passed to `observe` will be used to create the bucket tag.
+
+        For example:
+
+            h = Histogram(
+                'commcare.request.duration', 'description',
+                bucket_tag='duration', buckets=[1,2,3], bucket_units='ms'
+            )
+            h.observe(1.4)
+
+            # resulting Datadog metric
+            #    commcare.request.duration:1|c|#duration:lt_2ms
+
+        For more details see:
+        * https://github.com/dimagi/commcare-hq/pull/17080
+        * https://github.com/dimagi/commcare-hq/pull/17030#issuecomment-315794700
+        """
+        tags = _format_tags(tags)
+        if not tags:
+            tags = []
+        bucket = bucket_value(value, buckets, bucket_unit)
+        tags.append(f'{bucket_tag}:{bucket}')
+        _datadog_record(statsd.increment, name, 1, tags)
+
+
 def _format_tags(tag_values: dict):
     if not tag_values:
         return None
@@ -33,55 +75,3 @@ def _datadog_record(fn, name, value, tags=None):
         fn(name, value, tags=tags)
     except Exception:
         datadog_logger.exception('Unable to record Datadog stats')
-
-
-class Counter(HqCounter):
-    def _record(self, amount: float, tag_values: dict):
-        tags = _format_tags(tag_values)
-        _datadog_record(statsd.increment, self.name, amount, tags)
-
-
-class Gauge(HqGauge):
-    def _record(self, value: float, tag_values: dict):
-        tags = _format_tags(tag_values)
-        _datadog_record(statsd.gauge, self.name, value, tags)
-
-
-class Histogram(HqHistogram):
-    """This implementation of histogram uses tagging to record the buckets.
-    It does not use the Datadog Histogram metric type.
-
-    The metric itself will be incremented by 1 on each call to `observe`. The value
-    passed to `observe` will be used to create the bucket tag.
-
-    For example:
-
-        h = Histogram(
-            'commcare.request.duration', 'description',
-            bucket_tag='duration', buckets=[1,2,3], bucket_units='ms'
-        )
-        h.observe(1.4)
-
-        # resulting Datadog metric
-        #    commcare.request.duration:1|c|#duration:lt_2ms
-
-    For more details see:
-    * https://github.com/dimagi/commcare-hq/pull/17080
-    * https://github.com/dimagi/commcare-hq/pull/17030#issuecomment-315794700
-    """
-    def _record(self, value: float, tag_values: dict):
-        tags = _format_tags(tag_values)
-        if not tags:
-            tags = []
-        bucket = bucket_value(value, self._buckets, self._bucket_unit)
-        tags.append(f'{self._bucket_tag}:{bucket}')
-        _datadog_record(statsd.increment, self.name, 1, tags)
-
-
-class DatadogMetrics(HqMetrics):
-    _counter_class = Counter
-    _gauge_class = Gauge
-    _histogram_class = Histogram
-
-    def enabled(self) -> bool:
-        return api._api_key and api._application_key
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 5abb6811f308..e407ce935e45 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -32,143 +32,64 @@ def _validate_tag_names(tag_names):
     return tag_names
 
 
-class MetricBase:
-    def __init__(self, name: str, documentation: str, tag_names: Iterable = ()):
-        self.name = name
-        if not METRIC_NAME_RE.match(name):
-            raise ValueError('Invalid metric name: ' + name)
-        _enforce_prefix(name, 'commcare')
-        self.documentation = documentation
-        self.tag_names = _validate_tag_names(tag_names)
-        self._init_metric()
-
-    def _init_metric(self):
-        pass
-
-    def _validate_tags(self, tag_values: dict):
-        if self.tag_names and not tag_values:
-            raise Exception('Metric has missing tag values.')
-
-        if tag_values:
-            assert isinstance(tag_values, dict)
-            if tag_values.keys() != self.tag_names:
-                raise ValueError('Incorrect tag names')
-
-    def _record(self, value: float, tags: dict):
-        raise NotImplementedError
-
-
-class HqCounter(MetricBase):
-    def inc(self, amount: float = 1, **tags):
-        """Increment the counter by the given amount."""
-        self._validate_tags(tags)
-        self._record(amount, tags)
-
-
-class HqGauge(MetricBase):
-    def set(self, value: float, **tags):
-        """Set gauge to the given value."""
-        self._validate_tags(tags)
-        self._record(value, tags)
-
-
 DEFAULT_BUCKETS = (.005, .01, .025, .05, .075, .1, .25, .5, .75, 1.0, 2.5, 5.0, 7.5, 10.0, INF)
 
 
-class HqHistogram(MetricBase):
-
-    def __init__(self, name: str, documentation: str,
-                  bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
-                  tag_names: Iterable = ()):
-        self._bucket_tag = bucket_tag
-        self._buckets = buckets
-        self._bucket_unit = bucket_unit
-        if self._bucket_tag in tag_names:
-            tag_names = tuple(name for name in tag_names if name != bucket_tag)
-        super().__init__(name, documentation, tag_names)
-
-    def observe(self, value: float, **tags):
-        """Update histogram with the given value."""
-        self._validate_tags(tags)
-        self._record(value, tags)
-
-
 class HqMetrics(metaclass=abc.ABCMeta):
-    _counter_class = None
-    _gauge_class = None
-    _histogram_class = None
-
-    @abstractmethod
-    def enabled(self) -> bool:
-        raise NotImplementedError
+    def validate(self):
+        pass
 
-    def counter(self, name: str, documentation: str, tag_names: Iterable = ()) -> HqCounter:
-        return self._counter_class(name, documentation, tag_names)
+    def counter(self, name: str, value: float = 1, tags: dict = None, documentation: str = ''):
+        _enforce_prefix(name, 'commcare')
+        _validate_tag_names(tags)
+        self._counter(name, value, tags, documentation)
 
-    def gauge(self, name: str, documentation: str, tag_names: Iterable = ()) -> HqGauge:
-        return self._gauge_class(name, documentation, tag_names)
+    def gauge(self, name: str, value: float, tags: dict = None, documentation: str = ''):
+        _enforce_prefix(name, 'commcare')
+        _validate_tag_names(tags)
+        self._gauge(name, value, tags, documentation)
 
-    def histogram(self, name: str, documentation: str,
+    def histogram(self, name: str, value: float,
                   bucket_tag: str, buckets: List[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
-                  tag_names: Iterable = ()) -> HqHistogram:
+                  tags: dict = None, documentation: str = ''):
         """Create a histogram metric. Histogram implementations differ between provider. See provider
         implementations for details.
         """
-        return self._histogram_class(
-            name, documentation, bucket_tag, buckets=buckets, bucket_unit=bucket_unit, tag_names=tag_names
-        )
+        _enforce_prefix(name, 'commcare')
+        _validate_tag_names(tags)
+        self._histogram(name, value, bucket_tag, buckets, bucket_unit, tags, documentation)
 
+    @abstractmethod
+    def _counter(self, name, value, tags, documentation):
+        raise NotImplementedError
 
-class DummyMetric:
-    def __init__(self, *args, **kwargs):
+    @abstractmethod
+    def _gauge(self, name, value, tags, documentation):
         pass
 
-    def __getattr__(self, item):
-        if item in ('inc', 'set', 'observe'):
-            return lambda *args, **kwargs: None
-        raise AttributeError
-
+    @abstractmethod
+    def _histogram(self, name, value, bucket_tag, buckets, bucket_unit, tags, documentation):
+        pass
 
-class DummyMetrics(HqMetrics):
-    _counter_class = DummyMetric
-    _gauge_class = DummyMetric
-    _histogram_class = DummyMetric
 
-    def enabled(self) -> bool:
-        return True
+class DummyMetrics:
+    def __getattr__(self, item):
+        if item in ('counter', 'gauge', 'histogram'):
+            def _check(name, documentation, tags, *args, **kwargs):
+                _enforce_prefix(name, 'commcare')
+                _validate_tag_names(tags)
+            return _check
+        raise AttributeError
 
 
 class DelegatedMetrics:
-    """This class makes the metric class instantiation lazy and
-    also multiple metrics providers to be used."""
     def __init__(self, delegates):
         self.delegates = delegates
-        self._types = {
-            'counter': 'inc',
-            'gauge': 'set',
-            'histogram': 'observe',
-        }
-
-    def __getattr__(self, item):
-        if item in self._types:
-            def _make_type(*args, **kwargs):
-                return SimpleLazyObject(lambda: DelegatingMetric([
-                    getattr(d, item)(*args, **kwargs) for d in self.delegates
-                ], self._types[item]))
-            return _make_type
-        raise AttributeError
-
-
-class DelegatingMetric:
-    def __init__(self, delegates, record_fn_name):
-        self._delegates = delegates
-        self._record_fn_name = record_fn_name
 
     def __getattr__(self, item):
-        if item == self._record_fn_name:
-            def record(*args, **kwargs):
-                for metric in self._delegates:
-                    getattr(metric, item)(*args, **kwargs)
-            return record
-
+        if item in ('counter', 'gauge', 'histogram'):
+            def _record_metric(*args, **kwargs):
+                for delegate in self.delegates:
+                    getattr(delegate, item)(*args, **kwargs)
+            return _record_metric
         raise AttributeError
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
index cce9a0c671ed..ecb757dcc0e3 100644
--- a/corehq/util/metrics/prometheus.py
+++ b/corehq/util/metrics/prometheus.py
@@ -1,59 +1,33 @@
-import settings
-from corehq.util.metrics.metrics import (
-    HqCounter,
-    HqGauge,
-    HqHistogram,
-    HqMetrics,
-    MetricBase,
-)
+from typing import List
+
 from prometheus_client import Counter as PCounter
 from prometheus_client import Gauge as PGauge
 from prometheus_client import Histogram as PHistogram
 
-
-class Counter(HqCounter):
-    """https://prometheus.io/docs/concepts/metric_types/#counter"""
-
-    def _init_metric(self):
-        self.name = self.name.replace('.', '_')
-        self._delegate = PCounter(self.name, self.documentation, self.tag_names)
-
-    def _record(self, amount: float, tags):
-        _get_labeled(self._delegate, tags).inc(amount)
-
-
-class Gauge(HqGauge):
-    """https://prometheus.io/docs/concepts/metric_types/#gauge"""
-
-    def _init_metric(self):
-        self.name = self.name.replace('.', '_')
-        self._delegate = PGauge(self.name, self.documentation, self.tag_names)
-
-    def _record(self, value: float, tags):
-        _get_labeled(self._delegate, tags).set(value)
-
-
-class Histogram(HqHistogram):
-    """This metric class implements the native Prometheus Histogram type
-
-    https://prometheus.io/docs/concepts/metric_types/#histogram
-    """
-    def _init_metric(self):
-        self.name = self.name.replace('.', '_')
-        self._delegate = PHistogram(self.name, self.documentation, self.tag_names, buckets=self._buckets)
-
-    def _record(self, value: float, tags: dict):
-        _get_labeled(self._delegate, tags).observe(value)
-
-
-def _get_labeled(metric, labels):
-    return metric.labels(**labels) if labels else metric
+from corehq.util.metrics.metrics import HqMetrics
 
 
 class PrometheusMetrics(HqMetrics):
-    _counter_class = Counter
-    _gauge_class = Gauge
-    _histogram_class = Histogram
-
-    def enabled(self) -> bool:
-        return settings.ENABLE_PROMETHEUS_METRICS
+    def __init__(self):
+        self._metrics = {}
+
+    def _counter(self, name: str, value: float = 1, tags: dict = None, documentation: str = ''):
+        self._get_metric(PCounter, name, tags, documentation).inc(value)
+
+    def _gauge(self, name: str, value: float, tags: dict = None, documentation: str = ''):
+        self._get_metric(PGauge, name, tags, documentation).set(value)
+
+    def _histogram(self, name: str, value: float, bucket_tag: str, buckets: List[int], bucket_unit: str = '',
+                  tags: dict = None, documentation: str = ''):
+        self._get_metric(PHistogram, name, tags, documentation, buckets=buckets).observe(value)
+
+    def _get_metric(self, metric_type, name, tags, documentation, **kwargs):
+        name = name.replace('.', '_')
+        metric = self._metrics.get(name)
+        if not metric:
+            tags = tags or {}
+            metric = metric_type(name, documentation, labelnames=tags.keys(), **kwargs)
+            self._metrics[name] = metric
+        else:
+            assert metric.__class__ == metric_type
+        return metric.labels(**tags) if tags else metric
diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py
index bb9c8232a00b..920fb52bcacb 100644
--- a/corehq/util/metrics/tests/test_metrics.py
+++ b/corehq/util/metrics/tests/test_metrics.py
@@ -6,9 +6,6 @@
 from corehq.util.metrics import DatadogMetrics, PrometheusMetrics
 from corehq.util.metrics.metrics import (
     DelegatedMetrics,
-    HqCounter,
-    HqGauge,
-    HqHistogram,
 )
 from corehq.util.metrics.tests.utils import patch_datadog
 from prometheus_client.samples import Sample
@@ -25,33 +22,31 @@ def setUpClass(cls):
         cls.provider = cls.provider_class()
 
     def test_counter(self):
-        counter = self.provider.counter('commcare.test.counter', 'Description', tag_names=['t1', 't2'])
-        counter.inc(t1='a', t2='b')
-        counter.inc(2, t1='c', t2='b')
-        counter.inc(t1='c', t2='b')
-        self.assertCounterMetric(counter, {
+        self.provider.counter('commcare.test.counter', tags={'t1': 'a', 't2': 'b'})
+        self.provider.counter('commcare.test.counter', 2, tags={'t1': 'c', 't2': 'b'})
+        self.provider.counter('commcare.test.counter', tags={'t1': 'c', 't2': 'b'})
+        self.assertCounterMetric('commcare.test.counter', {
             (('t1', 'a'), ('t2', 'b')): 1,
             (('t1', 'c'), ('t2', 'b')): 3,
         })
 
     def test_gauge(self):
-        gauge = self.provider.gauge('commcare.test.gauge', 'Description', tag_names=['t1', 't2'])
-        gauge.set(4.2, t1='a', t2='b')
-        gauge.set(2, t1='c', t2='b')
-        gauge.set(5, t1='c', t2='b')
-        self.assertGaugeMetric(gauge, {
+        self.provider.gauge('commcare.test.gauge', 4.2, tags={'t1': 'a', 't2': 'b'})
+        self.provider.gauge('commcare.test.gauge', 2, tags={'t1': 'c', 't2': 'b'})
+        self.provider.gauge('commcare.test.gauge', 5, tags={'t1': 'c', 't2': 'b'})
+        self.assertGaugeMetric('commcare.test.gauge', {
             (('t1', 'a'), ('t2', 'b')): 4.2,
             (('t1', 'c'), ('t2', 'b')): 5,
         })
 
-    def assertCounterMetric(self, metric: HqCounter, expected: Dict[Tuple[Tuple[str, str], ...], float]):
+    def assertCounterMetric(self, metric: str, expected: Dict[Tuple[Tuple[str, str], ...], float]):
         """
         :param metric: metric class
         :param expected: dict mapping tag tuples to metric values
         """
         raise NotImplementedError
 
-    def assertGaugeMetric(self, metric: HqGauge, expected: Dict[Tuple[Tuple[str, str], ...], float]):
+    def assertGaugeMetric(self, metric: str, expected: Dict[Tuple[Tuple[str, str], ...], float]):
         """
         :param metric: metric class
         :param expected: dict mapping tag tuples to metric values
@@ -72,46 +67,46 @@ def tearDown(self) -> None:
         super().tearDown()
 
     def test_histogram(self):
-        histogram = self.provider.histogram(
-            'commcare.test.histogram', 'Description', 'duration',
-            buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2']
-        )
-        histogram.observe(0.2, t1='a', t2='b')
-        histogram.observe(0.7, t1='a', t2='b')
-        histogram.observe(2.5, t1='a', t2='b')
-
-        histogram.observe(2, t1='c', t2='b')
-        histogram.observe(5, t1='c', t2='b')
-        self.assertHistogramMetric(histogram, {
+        for value in (0.2, 0.7, 2.5):
+            self.provider.histogram(
+                'commcare.test.histogram', value, 'duration',
+                buckets=[1, 2, 3], bucket_unit='ms', tags={'t1': 'a', 't2': 'b'}
+            )
+        for value in (2, 5):
+            self.provider.histogram(
+                'commcare.test.histogram', value, 'duration',
+                buckets=[1, 2, 3], bucket_unit='ms', tags={'t1': 'c', 't2': 'b'}
+            )
+        self.assertHistogramMetric('commcare.test.histogram', {
             (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1},
             (('t1', 'c'), ('t2', 'b')): {3: 1, INF: 1}
-        })
+        }, 'duration', [1, 2, 3], 'ms')
 
-    def assertCounterMetric(self, metric, expected):
-        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name})
+    def assertCounterMetric(self, metric_name, expected):
+        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric_name})
         actual = {
             key[1]: sum(val) for key, val in self.recorded_metrics.items()
         }
         self.assertDictEqual(actual, expected)
 
-    def assertGaugeMetric(self, metric, expected):
-        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name})
+    def assertGaugeMetric(self, metric_name, expected):
+        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric_name})
         actual = {
             key[1]: val[-1] for key, val in self.recorded_metrics.items()
         }
         self.assertDictEqual(actual, expected)
 
-    def assertHistogramMetric(self, metric, expected):
-        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric.name})
+    def assertHistogramMetric(self, metric_name, expected, bucket_tag, buckets, bucket_unit):
+        self.assertEqual({key[0] for key in self.recorded_metrics}, {metric_name})
         expected_samples = {}
-        for tags, buckets in expected.items():
-            for bucket, val in buckets.items():
+        for tags, expected_buckets in expected.items():
+            for bucket, val in expected_buckets.items():
                 prefix = 'lt'
                 if bucket == INF:
-                    bucket = metric._buckets[-1]
+                    bucket = buckets[-1]
                     prefix = 'over'
-                bucket_tag = (metric._bucket_tag, f'{prefix}_{bucket:03d}{metric._bucket_unit}')
-                expected_samples[tuple(sorted(tags + (bucket_tag,)))] = val
+                dd_bucket_tag = (bucket_tag, f'{prefix}_{bucket:03d}{bucket_unit}')
+                expected_samples[tuple(sorted(tags + (dd_bucket_tag,)))] = val
 
         actual = {
             key[1]: sum(val) for key, val in self.recorded_metrics.items()
@@ -123,20 +118,20 @@ class TestPrometheusMetrics(_TestMetrics):
     provider_class = PrometheusMetrics
 
     def test_histogram(self):
-        histogram = self.provider.histogram(
-            'commcare.test.histogram', 'Description', 'duration',
-            buckets=[1, 2, 3], bucket_unit='ms', tag_names=['t1', 't2']
-        )
-        histogram.observe(0.2, t1='a', t2='b')
-        histogram.observe(0.7, t1='a', t2='b')
-        histogram.observe(2.5, t1='a', t2='b')
-
-        histogram.observe(2, t1='c', t2='b')
-        histogram.observe(5, t1='c', t2='b')
-        self.assertHistogramMetric(histogram, {
+        for value in (0.2, 0.7, 2.5):
+            self.provider.histogram(
+                'commcare_test_histogram', value, 'duration',
+                buckets=[1, 2, 3], bucket_unit='ms', tags={'t1': 'a', 't2': 'b'}
+            )
+        for value in (2, 5):
+            self.provider.histogram(
+                'commcare_test_histogram', value, 'duration',
+                buckets=[1, 2, 3], bucket_unit='ms', tags={'t1': 'c', 't2': 'b'}
+            )
+        self.assertHistogramMetric('commcare_test_histogram', {
             (('t1', 'a'), ('t2', 'b')): {1: 2, 3: 1},
             (('t1', 'c'), ('t2', 'b')): {2: 1, INF: 1}
-        })
+        }, [1, 2, 3])
 
     def _samples_to_dics(self, samples, filter_name=None):
         """Convert a Sample tuple into a dict((name, (labels tuple)) -> value)"""
@@ -146,27 +141,32 @@ def _samples_to_dics(self, samples, filter_name=None):
             if not filter_name or sample.name == filter_name
         }
 
-    def assertGaugeMetric(self, metric, expected):
-        [collected] = metric._delegate.collect()
+    def assertGaugeMetric(self, metric_name, expected):
+        metric_name = metric_name.replace('.', '_')
+        metric = self.provider._metrics[metric_name]
+        [collected] = metric.collect()
         actual = self._samples_to_dics(collected.samples)
         self.assertDictEqual(actual, expected)
 
-    def assertCounterMetric(self, metric, expected):
-        total_name = f'{metric.name}_total'
-        [collected] = metric._delegate.collect()
+    def assertCounterMetric(self, metric_name, expected):
+        metric_name = metric_name.replace('.', '_')
+        metric = self.provider._metrics[metric_name]
+        total_name = f'{metric_name}_total'
+        [collected] = metric.collect()
         actual = self._samples_to_dics(collected.samples, total_name)
         self.assertDictEqual(actual, expected)
 
-    def assertHistogramMetric(self, metric, expected):
+    def assertHistogramMetric(self, metric_name, expected, buckets):
         # Note that Prometheus histograms are cumulative so we must sum up the successive bucket values
         # https://en.wikipedia.org/wiki/Histogram#Cumulative_histogram
-        [collected] = metric._delegate.collect()
+        metric = self.provider._metrics[metric_name]
+        [collected] = metric.collect()
 
-        sample_name = f'{metric.name}_bucket'
+        sample_name = f'{metric_name}_bucket'
         expected_samples = []
         for key, value in expected.items():
             cumulative_value = 0
-            for bucket in metric._buckets:
+            for bucket in buckets:
                 val = value.get(bucket, 0)
                 cumulative_value += val
                 labels = dict(key + (('le', str(float(bucket))),))
@@ -181,40 +181,3 @@ def assertHistogramMetric(self, metric, expected):
             if s.name.endswith('bucket')
         ]
         self.assertListEqual(actual, expected_samples)
-
-
-def test_delegate_lazy():
-    metrics = DelegatedMetrics([DatadogMetrics(), PrometheusMetrics()])
-
-    def _check(metric):
-        assert isinstance(metric, SimpleLazyObject), ''
-
-    test_cases = [
-        metrics.counter('commcare.name.1', ''),
-        metrics.gauge('commcare.name.2', ''),
-        metrics.histogram('commcare.name.3', '', 'duration'),
-    ]
-    for metric in test_cases:
-        yield _check, metric
-
-
-def test_lazy_recording():
-    metrics = DelegatedMetrics([DatadogMetrics(), PrometheusMetrics()])
-
-    def _check(metric, method_name):
-        with patch_datadog() as stats:
-            getattr(metric, method_name)(1)
-
-        dd_metric, prom_metric = metric._delegates
-        [collected] = prom_metric._delegate.collect()
-
-        eq(len(stats), 1, stats)
-        eq(len(collected.samples) >= 1, True, collected.samples)
-
-    test_cases = [
-        (metrics.counter('commcare.name.1', ''), 'inc'),
-        (metrics.gauge('commcare.name.2', ''), 'set'),
-        (metrics.histogram('commcare.name.3', '', 'duration'), 'observe'),
-    ]
-    for metric, method_name in test_cases:
-        yield _check, metric, method_name

From 31cfba91fa4092d307bed740fed1e7c9c35506ed Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 17 Mar 2020 10:48:06 +0200
Subject: [PATCH 22/40] re-do configuration via settings

---
 corehq/util/metrics/__init__.py | 17 ++++++++++++-----
 settings.py                     |  6 +++++-
 2 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 937a8f483efb..6ac72792915c 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -1,8 +1,10 @@
 from typing import List, Iterable
 
+import settings
 from corehq.util.metrics.datadog import DatadogMetrics
 from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, DEFAULT_BUCKETS
 from corehq.util.metrics.prometheus import PrometheusMetrics
+from dimagi.utils.modules import to_function
 
 _metrics = None
 
@@ -10,13 +12,18 @@
 def _get_metrics_provider():
     global _metrics
     if not _metrics:
-        enabled = list(filter(lambda m: m.enabled(), [DatadogMetrics(), PrometheusMetrics()]))
-        if not enabled:
+        providers = []
+        for provider_path in settings.METRICS_PROVIDERS:
+            provider = to_function(provider_path)
+            provider.validate()
+            providers.append(provider)
+
+        if not providers:
             _metrics = DummyMetrics()
-        elif len(enabled) > 1:
-            _metrics = DelegatedMetrics(enabled)
+        elif len(providers) > 1:
+            _metrics = DelegatedMetrics(providers)
         else:
-            _metrics = enabled[0]
+            _metrics = providers[0]
     return _metrics
 
 
diff --git a/settings.py b/settings.py
index 05431414ab78..20ec3f4005ea 100755
--- a/settings.py
+++ b/settings.py
@@ -836,9 +836,13 @@
 SUBSCRIPTION_USERNAME = None
 SUBSCRIPTION_PASSWORD = None
 
+# List of metrics providers to use. Available providers:
+# * 'corehq.util.metrics.datadog.DatadogMetrics'
+# * 'corehq.util.metrics.prometheus.PrometheusMetrics'
+METRICS_PROVIDERS = []
+
 DATADOG_API_KEY = None
 DATADOG_APP_KEY = None
-ENABLE_PROMETHEUS_METRICS = False
 
 SYNCLOGS_SQL_DB_ALIAS = 'default'
 

From 892e57bea62eac820a21fc5f1082dec4013ec8d9 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 17 Mar 2020 10:51:24 +0200
Subject: [PATCH 23/40] move initialization into provider

---
 corehq/util/datadog/apps.py     | 27 ---------------------------
 corehq/util/metrics/__init__.py |  2 +-
 corehq/util/metrics/datadog.py  | 18 ++++++++++++++++--
 corehq/util/metrics/metrics.py  |  2 +-
 settings.py                     |  1 -
 5 files changed, 18 insertions(+), 32 deletions(-)
 delete mode 100644 corehq/util/datadog/apps.py

diff --git a/corehq/util/datadog/apps.py b/corehq/util/datadog/apps.py
deleted file mode 100644
index 032d76e978d5..000000000000
--- a/corehq/util/datadog/apps.py
+++ /dev/null
@@ -1,27 +0,0 @@
-from django.apps import AppConfig
-from django.conf import settings
-
-
-class DatadogConfig(AppConfig):
-
-    name = 'corehq.util.datadog'
-    verbose_name = 'Datadog'
-
-    def ready(self):
-        if not settings.DATADOG_API_KEY or not settings.DATADOG_APP_KEY:
-            return
-
-        try:
-            from datadog import initialize
-        except ImportError:
-            pass
-        else:
-            initialize(settings.DATADOG_API_KEY, settings.DATADOG_APP_KEY)
-
-        if settings.UNIT_TESTING or settings.DEBUG or 'ddtrace.contrib.django' not in settings.INSTALLED_APPS:
-            try:
-                from ddtrace import tracer
-                tracer.enabled = False
-            except ImportError:
-                pass
-
diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 6ac72792915c..4d34b1e48822 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -15,7 +15,7 @@ def _get_metrics_provider():
         providers = []
         for provider_path in settings.METRICS_PROVIDERS:
             provider = to_function(provider_path)
-            provider.validate()
+            provider.initialize()
             providers.append(provider)
 
         if not providers:
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index a2808f72195f..09efc8dc8524 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -16,10 +16,24 @@
 
 
 class DatadogMetrics(HqMetrics):
-    def validate(self):
-        if not (api._api_key and api._application_key):
+    def initialize(self):
+        if not settings.DATADOG_API_KEY or not settings.DATADOG_APP_KEY:
             raise Exception("Datadog not configured. Set DATADOG_API_KEY and DATADOG_APP_KEY in settings.")
 
+        try:
+            from datadog import initialize
+        except ImportError:
+            pass
+        else:
+            initialize(settings.DATADOG_API_KEY, settings.DATADOG_APP_KEY)
+
+        if settings.UNIT_TESTING or settings.DEBUG or 'ddtrace.contrib.django' not in settings.INSTALLED_APPS:
+            try:
+                from ddtrace import tracer
+                tracer.enabled = False
+            except ImportError:
+                pass
+
     def _counter(self, name: str, value: float, tags: dict = None, documentation: str = ''):
         dd_tags = _format_tags(tags)
         _datadog_record(statsd.increment, name, value, dd_tags)
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index e407ce935e45..8b28203692cf 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -36,7 +36,7 @@ def _validate_tag_names(tag_names):
 
 
 class HqMetrics(metaclass=abc.ABCMeta):
-    def validate(self):
+    def initialize(self):
         pass
 
     def counter(self, name: str, value: float = 1, tags: dict = None, documentation: str = ''):
diff --git a/settings.py b/settings.py
index 20ec3f4005ea..8541637ad31c 100755
--- a/settings.py
+++ b/settings.py
@@ -340,7 +340,6 @@
     'corehq.motech.openmrs',
     'corehq.motech.repeaters',
     'corehq.util',
-    'corehq.util.datadog.apps.DatadogConfig',
     'dimagi.ext',
     'corehq.blobs',
     'corehq.apps.case_search',

From dbdbd3fed490e54967e944125082f97ed5465ea9 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 17 Mar 2020 11:43:55 +0200
Subject: [PATCH 24/40] replace datadog_gauge

---
 corehq/apps/case_importer/tasks.py |  9 ++------
 corehq/apps/hqwebapp/tasks.py      |  5 +++--
 corehq/apps/sms/tasks.py           |  5 +++--
 corehq/motech/repeaters/tasks.py   |  4 ++--
 corehq/util/datadog/gauges.py      | 34 ------------------------------
 corehq/util/metrics/metrics.py     | 25 ++++++++++++++++++++++
 6 files changed, 35 insertions(+), 47 deletions(-)

diff --git a/corehq/apps/case_importer/tasks.py b/corehq/apps/case_importer/tasks.py
index 14d63356b5c6..e0e4202a0ada 100644
--- a/corehq/apps/case_importer/tasks.py
+++ b/corehq/apps/case_importer/tasks.py
@@ -1,21 +1,16 @@
-from celery import states
-from celery.exceptions import Ignore
 from celery.schedules import crontab
 from celery.task import task
 
-from soil.progress import update_task_state
-
 from corehq.apps.hqadmin.tasks import (
     AbnormalUsageAlert,
     send_abnormal_usage_alert,
 )
-from corehq.util.datadog.gauges import datadog_gauge_task
-
 from .do_import import do_import
 from .exceptions import ImporterError
 from .tracking.analytics import get_case_upload_files_total_bytes
 from .tracking.case_upload_tracker import CaseUpload
 from .util import get_importer_error_message, exit_celery_with_error_message
+from ...util.metrics.metrics import metrics_gauge_task
 
 
 @task(serializer='pickle', queue='case_import_queue')
@@ -64,7 +59,7 @@ def _alert_on_result(result, domain):
         send_abnormal_usage_alert.delay(alert)
 
 
-total_bytes = datadog_gauge_task(
+total_bytes = metrics_gauge_task(
     'commcare.case_importer.files.total_bytes',
     get_case_upload_files_total_bytes,
     run_every=crontab(minute=0)
diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py
index f08cb646ae0d..a030c54fe211 100644
--- a/corehq/apps/hqwebapp/tasks.py
+++ b/corehq/apps/hqwebapp/tasks.py
@@ -5,9 +5,10 @@
 from celery.task import task, periodic_task
 
 from corehq.util.bounced_email_manager import BouncedEmailManager
+from corehq.util.metrics.metrics import metrics_gauge_task
 from dimagi.utils.logging import notify_exception
 
-from corehq.util.datadog.gauges import datadog_gauge_task, datadog_track_errors
+from corehq.util.datadog.gauges import datadog_track_errors
 from corehq.util.log import send_HTML_email
 
 
@@ -129,5 +130,5 @@ def get_maintenance_alert_active():
     return 1 if MaintenanceAlert.get_latest_alert() else 0
 
 
-datadog_gauge_task('commcare.maintenance_alerts.active', get_maintenance_alert_active,
+metrics_gauge_task('commcare.maintenance_alerts.active', get_maintenance_alert_active,
                    run_every=crontab(minute=1))
diff --git a/corehq/apps/sms/tasks.py b/corehq/apps/sms/tasks.py
index 968551e66eeb..aa5c58446751 100644
--- a/corehq/apps/sms/tasks.py
+++ b/corehq/apps/sms/tasks.py
@@ -7,6 +7,7 @@
 
 from celery.schedules import crontab
 
+from corehq.util.metrics.metrics import metrics_gauge_task
 from dimagi.utils.couch import (
     CriticalSection,
     get_redis_client,
@@ -50,7 +51,7 @@
 from corehq.apps.users.models import CommCareUser, CouchUser
 from corehq.messaging.util import use_phone_entries
 from corehq.util.celery_utils import no_result_task
-from corehq.util.datadog.gauges import datadog_counter, datadog_gauge_task
+from corehq.util.datadog.gauges import datadog_counter
 from corehq.util.timezones.conversions import ServerTime
 
 MAX_TRIAL_SMS = 50
@@ -588,4 +589,4 @@ def queued_sms():
     return QueuedSMS.objects.count()
 
 
-datadog_gauge_task('commcare.sms.queued', queued_sms, run_every=crontab())
+metrics_gauge_task('commcare.sms.queued', queued_sms, run_every=crontab())
diff --git a/corehq/motech/repeaters/tasks.py b/corehq/motech/repeaters/tasks.py
index 5382fbdca840..25cdd705fb60 100644
--- a/corehq/motech/repeaters/tasks.py
+++ b/corehq/motech/repeaters/tasks.py
@@ -6,6 +6,7 @@
 from celery.task import periodic_task, task
 from celery.utils.log import get_task_logger
 
+from corehq.util.metrics.metrics import metrics_gauge_task
 from dimagi.utils.couch import get_redis_lock
 from dimagi.utils.couch.undo import DELETED_SUFFIX
 
@@ -25,7 +26,6 @@
 from corehq.util.datadog.gauges import (
     datadog_bucket_timer,
     datadog_counter,
-    datadog_gauge_task,
 )
 from corehq.util.datadog.utils import make_buckets_from_timedeltas
 from corehq.util.soft_assert import soft_assert
@@ -137,7 +137,7 @@ def process_repeat_record(repeat_record):
         logging.exception('Failed to process repeat record: {}'.format(repeat_record._id))
 
 
-repeaters_overdue = datadog_gauge_task(
+repeaters_overdue = metrics_gauge_task(
     'commcare.repeaters.overdue',
     get_overdue_repeat_record_count,
     run_every=crontab()  # every minute
diff --git a/corehq/util/datadog/gauges.py b/corehq/util/datadog/gauges.py
index bb4788491192..744fa9ebc057 100644
--- a/corehq/util/datadog/gauges.py
+++ b/corehq/util/datadog/gauges.py
@@ -9,23 +9,6 @@
 from corehq.util.timer import TimingContext
 
 
-def datadog_gauge_task(name, fn, run_every, enforce_prefix='commcare'):
-    """
-    helper for easily registering datadog gauges to run periodically
-
-    To update a datadog gauge on a schedule based on the result of a function
-    just add to your app's tasks.py:
-
-        my_calculation = datadog_gauge_task('my.datadog.metric', my_calculation_function,
-                                            run_every=crontab(minute=0))
-
-    """
-    _enforce_prefix(name, enforce_prefix)
-
-    datadog_gauge = _DatadogGauge(name, fn, run_every)
-    return datadog_gauge.periodic_task()
-
-
 def datadog_histogram(name, value, enforce_prefix='commcare', tags=None):
     """
     Usage: Used to track the statistical distribution of a set of values over a statsd flush period.
@@ -100,23 +83,6 @@ def new_stop(name=None):
     return timer
 
 
-class _DatadogGauge(object):
-
-    def __init__(self, name, fn, run_every):
-        self.name = name
-        self.fn = fn
-        self.run_every = run_every
-
-    def periodic_task(self):
-        @periodic_task(serializer='pickle', queue='background_queue', run_every=self.run_every,
-                       acks_late=True, ignore_result=True)
-        @wraps(self.fn)
-        def inner(*args, **kwargs):
-            statsd.gauge(self.name, self.fn(*args, **kwargs))
-
-        return inner
-
-
 def _enforce_prefix(name, prefix):
     soft_assert(fail_if_debug=True).call(
         not prefix or name.split('.')[0] == prefix,
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 8b28203692cf..00ed7a633b98 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -1,8 +1,10 @@
 import abc
 import re
 from abc import abstractmethod
+from functools import wraps
 from typing import Iterable, List
 
+from celery.task import periodic_task
 from django.utils.functional import SimpleLazyObject
 
 from corehq.util.soft_assert import soft_assert
@@ -93,3 +95,26 @@ def _record_metric(*args, **kwargs):
                     getattr(delegate, item)(*args, **kwargs)
             return _record_metric
         raise AttributeError
+
+
+def metrics_gauge_task(name, fn, run_every):
+    """
+    helper for easily registering gauges to run periodically
+
+    To update a gauge on a schedule based on the result of a function
+    just add to your app's tasks.py:
+
+        my_calculation = metrics_gauge_task('commcare.my.metric', my_calculation_function,
+                                            run_every=crontab(minute=0))
+
+    """
+    _enforce_prefix(name, 'commcare')
+
+    @periodic_task(serializer='pickle', queue='background_queue', run_every=run_every,
+                   acks_late=True, ignore_result=True)
+    @wraps(fn)
+    def inner(*args, **kwargs):
+        from corehq.util.metrics import metrics_gauge
+        metrics_gauge(name, fn(*args, **kwargs))
+
+    return inner

From 5897eb9574ad8baf4615eb8ff5a2062b2f7cf14f Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 17 Mar 2020 14:43:11 +0200
Subject: [PATCH 25/40] instantiate provider

---
 corehq/util/metrics/__init__.py | 2 +-
 corehq/util/metrics/urls.py     | 0
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 corehq/util/metrics/urls.py

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 4d34b1e48822..9ee5c6dcda2d 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -14,7 +14,7 @@ def _get_metrics_provider():
     if not _metrics:
         providers = []
         for provider_path in settings.METRICS_PROVIDERS:
-            provider = to_function(provider_path)
+            provider = to_function(provider_path)()
             provider.initialize()
             providers.append(provider)
 
diff --git a/corehq/util/metrics/urls.py b/corehq/util/metrics/urls.py
new file mode 100644
index 000000000000..e69de29bb2d1

From 28a8a5bc448bbc958e9defa9b27d82ee66143702 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 17 Mar 2020 14:43:40 +0200
Subject: [PATCH 26/40] hook up metrics view

---
 corehq/util/metrics/urls.py          |  7 +++++++
 corehq/util/metrics/views.py         | 19 +++++++++++++++++++
 deployment/gunicorn/gunicorn_conf.py | 13 +++++++++++++
 urls.py                              |  3 ++-
 4 files changed, 41 insertions(+), 1 deletion(-)
 create mode 100644 corehq/util/metrics/views.py

diff --git a/corehq/util/metrics/urls.py b/corehq/util/metrics/urls.py
index e69de29bb2d1..b6df3985ac4f 100644
--- a/corehq/util/metrics/urls.py
+++ b/corehq/util/metrics/urls.py
@@ -0,0 +1,7 @@
+from django.conf.urls import url
+
+from corehq.util.metrics.views import prometheus_metrics
+
+urlpatterns = [
+    url(r"^metrics$", prometheus_metrics, name="prometheus-django-metrics")
+]
diff --git a/corehq/util/metrics/views.py b/corehq/util/metrics/views.py
new file mode 100644
index 000000000000..d55d55934b7b
--- /dev/null
+++ b/corehq/util/metrics/views.py
@@ -0,0 +1,19 @@
+import os
+
+import prometheus_client
+from django.http import HttpResponse
+from prometheus_client import multiprocess
+
+
+def prometheus_metrics(request):
+    """Exports /metrics as a Django view.
+    """
+    if "prometheus_multiproc_dir" in os.environ:
+        registry = prometheus_client.CollectorRegistry()
+        multiprocess.MultiProcessCollector(registry)
+    else:
+        registry = prometheus_client.REGISTRY
+    metrics_page = prometheus_client.generate_latest(registry)
+    return HttpResponse(
+        metrics_page, content_type=prometheus_client.CONTENT_TYPE_LATEST
+    )
diff --git a/deployment/gunicorn/gunicorn_conf.py b/deployment/gunicorn/gunicorn_conf.py
index 668c9e68a008..9462855841ad 100644
--- a/deployment/gunicorn/gunicorn_conf.py
+++ b/deployment/gunicorn/gunicorn_conf.py
@@ -1,3 +1,6 @@
+import glob
+import os
+
 preload_app = True
 worker_class = 'gevent'
 keepalive = 60
@@ -18,3 +21,13 @@ def post_fork(server, worker):
     # see: https://github.com/benoitc/gunicorn/issues/527#issuecomment-19601046
     from django.urls import resolve
     resolve('/')
+
+def on_starting(server):
+    path = os.environ.get('prometheus_multiproc_dir')
+    for f in glob.glob(os.path.join(path, '*.db')):
+        os.remove(f)
+
+
+def child_exit(server, worker):
+    from prometheus_client import multiprocess
+    multiprocess.mark_process_dead(worker.pid)
diff --git a/urls.py b/urls.py
index 6d2b35fc2004..14bb326d340e 100644
--- a/urls.py
+++ b/urls.py
@@ -141,7 +141,8 @@
     url(r'^unsubscribe_report/(?P<scheduled_report_id>[\w-]+)/'
         r'(?P<user_email>[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})/(?P<scheduled_report_secret>[\w-]+)/',
         ReportNotificationUnsubscribeView.as_view(), name=ReportNotificationUnsubscribeView.urlname),
-    url(r'^phone/list_apps', list_apps, name="list_accessible_apps")
+    url(r'^phone/list_apps', list_apps, name="list_accessible_apps"),
+    url(r'', include('corehq.util.metrics.urls')),
 ] + LOCAL_APP_URLS
 
 if settings.ENABLE_PRELOGIN_SITE:

From abd03555f983678b7ad47aa9f1dbba3c78588732 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 17 Mar 2020 15:36:13 +0200
Subject: [PATCH 27/40] todo

---
 corehq/util/metrics/metrics.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 00ed7a633b98..093c9ffbd63e 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -115,6 +115,7 @@ def metrics_gauge_task(name, fn, run_every):
     @wraps(fn)
     def inner(*args, **kwargs):
         from corehq.util.metrics import metrics_gauge
+        # TODO: make this use prometheus push gateway
         metrics_gauge(name, fn(*args, **kwargs))
 
     return inner

From 75fa9793259f75075bcc2f83fe58253bd4479737 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Tue, 17 Mar 2020 16:22:16 +0200
Subject: [PATCH 28/40] lint

---
 corehq/apps/receiverwrapper/views.py      |  8 +++-----
 corehq/util/metrics/__init__.py           | 11 +++++------
 corehq/util/metrics/datadog.py            |  1 -
 corehq/util/metrics/metrics.py            |  4 +---
 corehq/util/metrics/tests/test_metrics.py |  5 -----
 deployment/gunicorn/gunicorn_conf.py      |  2 ++
 6 files changed, 11 insertions(+), 20 deletions(-)

diff --git a/corehq/apps/receiverwrapper/views.py b/corehq/apps/receiverwrapper/views.py
index 00d00de4b8cb..6fe383b8465b 100644
--- a/corehq/apps/receiverwrapper/views.py
+++ b/corehq/apps/receiverwrapper/views.py
@@ -1,16 +1,11 @@
-import logging
 import os
 
 from django.http import HttpResponseBadRequest, HttpResponseForbidden
 from django.views.decorators.csrf import csrf_exempt
 from django.views.decorators.http import require_POST
 
-from couchdbkit import ResourceNotFound
-from tastypie.http import HttpTooManyRequests
-
 import couchforms
 from casexml.apps.case.xform import get_case_updates, is_device_report
-from corehq.util.metrics import metrics, metrics_counter, metrics_histogram
 from couchforms import openrosa_response
 from couchforms.const import MAGIC_PROPERTY, BadRequest
 from couchforms.getters import MultimediaBug
@@ -51,7 +46,10 @@
     convert_xform_to_json,
     should_use_sql_backend,
 )
+from corehq.util.metrics import metrics_counter, metrics_histogram
 from corehq.util.timer import TimingContext
+from couchdbkit import ResourceNotFound
+from tastypie.http import HttpTooManyRequests
 
 PROFILE_PROBABILITY = float(os.getenv('COMMCARE_PROFILE_SUBMISSION_PROBABILITY', 0))
 PROFILE_LIMIT = os.getenv('COMMCARE_PROFILE_SUBMISSION_LIMIT')
diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 9ee5c6dcda2d..9cdb66dd182f 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -1,9 +1,7 @@
-from typing import List, Iterable
+from typing import Iterable
 
 import settings
-from corehq.util.metrics.datadog import DatadogMetrics
 from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, DEFAULT_BUCKETS
-from corehq.util.metrics.prometheus import PrometheusMetrics
 from dimagi.utils.modules import to_function
 
 _metrics = None
@@ -37,8 +35,9 @@ def metrics_gauge(name: str, value: float, tags: dict = None, documentation: str
     provider.gauge(name, value, tags, documentation)
 
 
-def metrics_histogram(name: str, value: float,
-                  bucket_tag: str, buckets: Iterable[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
-                  tags: dict = None, documentation: str = ''):
+def metrics_histogram(
+        name: str, value: float,
+        bucket_tag: str, buckets: Iterable[int] = DEFAULT_BUCKETS, bucket_unit: str = '',
+        tags: dict = None, documentation: str = ''):
     provider = _get_metrics_provider()
     provider.histogram(name, value, bucket_tag, buckets, bucket_unit, tags, documentation)
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index 09efc8dc8524..f5bca56f24f1 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -5,7 +5,6 @@
 
 from corehq.util.datadog.utils import bucket_value
 from corehq.util.metrics.metrics import HqMetrics
-from datadog import api
 from datadog.dogstatsd.base import DogStatsd
 
 datadog_logger = logging.getLogger('datadog')
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 093c9ffbd63e..1ddb92167c30 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -2,11 +2,9 @@
 import re
 from abc import abstractmethod
 from functools import wraps
-from typing import Iterable, List
+from typing import List
 
 from celery.task import periodic_task
-from django.utils.functional import SimpleLazyObject
-
 from corehq.util.soft_assert import soft_assert
 from prometheus_client.utils import INF
 
diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py
index 920fb52bcacb..535521bdc9b3 100644
--- a/corehq/util/metrics/tests/test_metrics.py
+++ b/corehq/util/metrics/tests/test_metrics.py
@@ -1,16 +1,11 @@
 from typing import Dict, Tuple
 
 from django.test import SimpleTestCase
-from django.utils.functional import SimpleLazyObject
 
 from corehq.util.metrics import DatadogMetrics, PrometheusMetrics
-from corehq.util.metrics.metrics import (
-    DelegatedMetrics,
-)
 from corehq.util.metrics.tests.utils import patch_datadog
 from prometheus_client.samples import Sample
 from prometheus_client.utils import INF
-from testil import eq
 
 
 class _TestMetrics(SimpleTestCase):
diff --git a/deployment/gunicorn/gunicorn_conf.py b/deployment/gunicorn/gunicorn_conf.py
index 9462855841ad..631b4d6a2fe5 100644
--- a/deployment/gunicorn/gunicorn_conf.py
+++ b/deployment/gunicorn/gunicorn_conf.py
@@ -22,7 +22,9 @@ def post_fork(server, worker):
     from django.urls import resolve
     resolve('/')
 
+
 def on_starting(server):
+    """Wipe hte metrics from previous processes"""
     path = os.environ.get('prometheus_multiproc_dir')
     for f in glob.glob(os.path.join(path, '*.db')):
         os.remove(f)

From 017ca5a57c5e6150df16d05abd0350577dc1a2a5 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 10:10:53 +0200
Subject: [PATCH 29/40] PR feedback

---
 corehq/util/metrics/__init__.py | 6 ++++++
 corehq/util/metrics/datadog.py  | 6 +++++-
 corehq/util/metrics/metrics.py  | 6 +++---
 3 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 9cdb66dd182f..2bc9b73c2159 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -4,6 +4,12 @@
 from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, DEFAULT_BUCKETS
 from dimagi.utils.modules import to_function
 
+__all__ = [
+    'metrics_counter',
+    'metrics_gauge',
+    'metrics_histogram',
+]
+
 _metrics = None
 
 
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index f5bca56f24f1..ee30af19f650 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -17,7 +17,11 @@
 class DatadogMetrics(HqMetrics):
     def initialize(self):
         if not settings.DATADOG_API_KEY or not settings.DATADOG_APP_KEY:
-            raise Exception("Datadog not configured. Set DATADOG_API_KEY and DATADOG_APP_KEY in settings.")
+            raise Exception(
+                "Datadog not configured."
+                "Set DATADOG_API_KEY and DATADOG_APP_KEY in settings or update METRICS_PROVIDERS"
+                "to remove the Datadog provider."
+            )
 
         try:
             from datadog import initialize
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 1ddb92167c30..87bc96e3cf3e 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -65,11 +65,11 @@ def _counter(self, name, value, tags, documentation):
 
     @abstractmethod
     def _gauge(self, name, value, tags, documentation):
-        pass
+        raise NotImplementedError
 
     @abstractmethod
     def _histogram(self, name, value, bucket_tag, buckets, bucket_unit, tags, documentation):
-        pass
+        raise NotImplementedError
 
 
 class DummyMetrics:
@@ -92,7 +92,7 @@ def _record_metric(*args, **kwargs):
                 for delegate in self.delegates:
                     getattr(delegate, item)(*args, **kwargs)
             return _record_metric
-        raise AttributeError
+        raise AttributeError(item)
 
 
 def metrics_gauge_task(name, fn, run_every):

From 770f4a037b7aed45a43f2013ae45215334a34e9a Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 10:12:03 +0200
Subject: [PATCH 30/40] log output from DebugMetrics

---
 corehq/util/metrics/__init__.py |  4 ++--
 corehq/util/metrics/metrics.py  | 12 +++++++++---
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 2bc9b73c2159..9d45ead614d5 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -1,7 +1,7 @@
 from typing import Iterable
 
 import settings
-from corehq.util.metrics.metrics import DummyMetrics, DelegatedMetrics, DEFAULT_BUCKETS
+from corehq.util.metrics.metrics import DebugMetrics, DelegatedMetrics, DEFAULT_BUCKETS
 from dimagi.utils.modules import to_function
 
 __all__ = [
@@ -23,7 +23,7 @@ def _get_metrics_provider():
             providers.append(provider)
 
         if not providers:
-            _metrics = DummyMetrics()
+            _metrics = DebugMetrics()
         elif len(providers) > 1:
             _metrics = DelegatedMetrics(providers)
         else:
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 87bc96e3cf3e..70597ce99bef 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -1,4 +1,5 @@
 import abc
+import logging
 import re
 from abc import abstractmethod
 from functools import wraps
@@ -14,6 +15,9 @@
 RESERVED_METRIC_TAG_NAMES = ['quantile', 'le']
 
 
+logger = logging.getLogger('commcare.metrics')
+
+
 def _enforce_prefix(name, prefix):
     soft_assert(fail_if_debug=True).call(
         not prefix or name.startswith(prefix),
@@ -72,14 +76,16 @@ def _histogram(self, name, value, bucket_tag, buckets, bucket_unit, tags, docume
         raise NotImplementedError
 
 
-class DummyMetrics:
+class DebugMetrics:
     def __getattr__(self, item):
         if item in ('counter', 'gauge', 'histogram'):
-            def _check(name, documentation, tags, *args, **kwargs):
+            def _check(name, value, *args, **kwargs):
+                tags = kwargs.get('tags', {})
                 _enforce_prefix(name, 'commcare')
                 _validate_tag_names(tags)
+                logger.debug("[%s] %s %s %s", item, name, tags, value)
             return _check
-        raise AttributeError
+        raise AttributeError(item)
 
 
 class DelegatedMetrics:

From 8bb9e03a5e476d7072ac34327fe624dcd5acdc5e Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 10:32:28 +0200
Subject: [PATCH 31/40] move metrics view to hq/admin

---
 deployment/gunicorn/gunicorn_conf.py | 2 +-
 urls.py                              | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/deployment/gunicorn/gunicorn_conf.py b/deployment/gunicorn/gunicorn_conf.py
index 631b4d6a2fe5..aa8399de7ad6 100644
--- a/deployment/gunicorn/gunicorn_conf.py
+++ b/deployment/gunicorn/gunicorn_conf.py
@@ -24,7 +24,7 @@ def post_fork(server, worker):
 
 
 def on_starting(server):
-    """Wipe hte metrics from previous processes"""
+    """Wipe the metrics from previous processes"""
     path = os.environ.get('prometheus_multiproc_dir')
     for f in glob.glob(os.path.join(path, '*.db')):
         os.remove(f)
diff --git a/urls.py b/urls.py
index 14bb326d340e..d0c98c0a1564 100644
--- a/urls.py
+++ b/urls.py
@@ -104,6 +104,7 @@
     url(r'^hq/sms/', include(sms_admin_interface_urls)),
     url(r'^hq/multimedia/', include('corehq.apps.hqmedia.urls')),
     url(r'^hq/admin/', include('corehq.apps.hqadmin.urls')),
+    url(r'^hq/admin/', include('corehq.util.metrics.urls')),
     url(r'^hq/flags/', include('corehq.apps.toggle_ui.urls')),
     url(r'^hq/notifications/', include('corehq.apps.notifications.urls')),
     url(r'^unicel/', include('corehq.messaging.smsbackends.unicel.urls')),
@@ -142,7 +143,6 @@
         r'(?P<user_email>[\w.%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,})/(?P<scheduled_report_secret>[\w-]+)/',
         ReportNotificationUnsubscribeView.as_view(), name=ReportNotificationUnsubscribeView.urlname),
     url(r'^phone/list_apps', list_apps, name="list_accessible_apps"),
-    url(r'', include('corehq.util.metrics.urls')),
 ] + LOCAL_APP_URLS
 
 if settings.ENABLE_PRELOGIN_SITE:

From 9a49d2681e15ee96f9baf18a8763199588b7e3aa Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 10:36:00 +0200
Subject: [PATCH 32/40] move metrics_gauge_task to package init

---
 corehq/apps/case_importer/tasks.py |  2 +-
 corehq/apps/hqwebapp/tasks.py      |  2 +-
 corehq/apps/sms/tasks.py           |  2 +-
 corehq/motech/repeaters/tasks.py   |  2 +-
 corehq/util/metrics/__init__.py    | 29 ++++++++++++++++++++++++++++-
 corehq/util/metrics/metrics.py     | 26 --------------------------
 6 files changed, 32 insertions(+), 31 deletions(-)

diff --git a/corehq/apps/case_importer/tasks.py b/corehq/apps/case_importer/tasks.py
index e0e4202a0ada..087ba77a8e30 100644
--- a/corehq/apps/case_importer/tasks.py
+++ b/corehq/apps/case_importer/tasks.py
@@ -10,7 +10,7 @@
 from .tracking.analytics import get_case_upload_files_total_bytes
 from .tracking.case_upload_tracker import CaseUpload
 from .util import get_importer_error_message, exit_celery_with_error_message
-from ...util.metrics.metrics import metrics_gauge_task
+from ...util.metrics import metrics_gauge_task
 
 
 @task(serializer='pickle', queue='case_import_queue')
diff --git a/corehq/apps/hqwebapp/tasks.py b/corehq/apps/hqwebapp/tasks.py
index a030c54fe211..35fdef85381f 100644
--- a/corehq/apps/hqwebapp/tasks.py
+++ b/corehq/apps/hqwebapp/tasks.py
@@ -5,7 +5,7 @@
 from celery.task import task, periodic_task
 
 from corehq.util.bounced_email_manager import BouncedEmailManager
-from corehq.util.metrics.metrics import metrics_gauge_task
+from corehq.util.metrics import metrics_gauge_task
 from dimagi.utils.logging import notify_exception
 
 from corehq.util.datadog.gauges import datadog_track_errors
diff --git a/corehq/apps/sms/tasks.py b/corehq/apps/sms/tasks.py
index aa5c58446751..254579db4273 100644
--- a/corehq/apps/sms/tasks.py
+++ b/corehq/apps/sms/tasks.py
@@ -7,7 +7,7 @@
 
 from celery.schedules import crontab
 
-from corehq.util.metrics.metrics import metrics_gauge_task
+from corehq.util.metrics import metrics_gauge_task
 from dimagi.utils.couch import (
     CriticalSection,
     get_redis_client,
diff --git a/corehq/motech/repeaters/tasks.py b/corehq/motech/repeaters/tasks.py
index 25cdd705fb60..1a6c1fc12431 100644
--- a/corehq/motech/repeaters/tasks.py
+++ b/corehq/motech/repeaters/tasks.py
@@ -6,7 +6,7 @@
 from celery.task import periodic_task, task
 from celery.utils.log import get_task_logger
 
-from corehq.util.metrics.metrics import metrics_gauge_task
+from corehq.util.metrics import metrics_gauge_task
 from dimagi.utils.couch import get_redis_lock
 from dimagi.utils.couch.undo import DELETED_SUFFIX
 
diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 9d45ead614d5..17eacee76f88 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -1,7 +1,10 @@
+from functools import wraps
 from typing import Iterable
 
+from celery.task import periodic_task
+
 import settings
-from corehq.util.metrics.metrics import DebugMetrics, DelegatedMetrics, DEFAULT_BUCKETS
+from corehq.util.metrics.metrics import DebugMetrics, DelegatedMetrics, DEFAULT_BUCKETS, _enforce_prefix
 from dimagi.utils.modules import to_function
 
 __all__ = [
@@ -47,3 +50,27 @@ def metrics_histogram(
         tags: dict = None, documentation: str = ''):
     provider = _get_metrics_provider()
     provider.histogram(name, value, bucket_tag, buckets, bucket_unit, tags, documentation)
+
+
+def metrics_gauge_task(name, fn, run_every):
+    """
+    helper for easily registering gauges to run periodically
+
+    To update a gauge on a schedule based on the result of a function
+    just add to your app's tasks.py:
+
+        my_calculation = metrics_gauge_task('commcare.my.metric', my_calculation_function,
+                                            run_every=crontab(minute=0))
+
+    """
+    _enforce_prefix(name, 'commcare')
+
+    @periodic_task(serializer='pickle', queue='background_queue', run_every=run_every,
+                   acks_late=True, ignore_result=True)
+    @wraps(fn)
+    def inner(*args, **kwargs):
+        from corehq.util.metrics import metrics_gauge
+        # TODO: make this use prometheus push gateway
+        metrics_gauge(name, fn(*args, **kwargs))
+
+    return inner
diff --git a/corehq/util/metrics/metrics.py b/corehq/util/metrics/metrics.py
index 70597ce99bef..d181927f8fb4 100644
--- a/corehq/util/metrics/metrics.py
+++ b/corehq/util/metrics/metrics.py
@@ -2,10 +2,8 @@
 import logging
 import re
 from abc import abstractmethod
-from functools import wraps
 from typing import List
 
-from celery.task import periodic_task
 from corehq.util.soft_assert import soft_assert
 from prometheus_client.utils import INF
 
@@ -99,27 +97,3 @@ def _record_metric(*args, **kwargs):
                     getattr(delegate, item)(*args, **kwargs)
             return _record_metric
         raise AttributeError(item)
-
-
-def metrics_gauge_task(name, fn, run_every):
-    """
-    helper for easily registering gauges to run periodically
-
-    To update a gauge on a schedule based on the result of a function
-    just add to your app's tasks.py:
-
-        my_calculation = metrics_gauge_task('commcare.my.metric', my_calculation_function,
-                                            run_every=crontab(minute=0))
-
-    """
-    _enforce_prefix(name, 'commcare')
-
-    @periodic_task(serializer='pickle', queue='background_queue', run_every=run_every,
-                   acks_late=True, ignore_result=True)
-    @wraps(fn)
-    def inner(*args, **kwargs):
-        from corehq.util.metrics import metrics_gauge
-        # TODO: make this use prometheus push gateway
-        metrics_gauge(name, fn(*args, **kwargs))
-
-    return inner

From 74ce07fd5eb064c64c5b2cf0e1732d759139870d Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 10:39:19 +0200
Subject: [PATCH 33/40] fix import

---
 corehq/util/metrics/tests/test_metrics.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/corehq/util/metrics/tests/test_metrics.py b/corehq/util/metrics/tests/test_metrics.py
index 535521bdc9b3..50b5ad514b51 100644
--- a/corehq/util/metrics/tests/test_metrics.py
+++ b/corehq/util/metrics/tests/test_metrics.py
@@ -2,7 +2,8 @@
 
 from django.test import SimpleTestCase
 
-from corehq.util.metrics import DatadogMetrics, PrometheusMetrics
+from corehq.util.metrics.datadog import DatadogMetrics
+from corehq.util.metrics.prometheus import PrometheusMetrics
 from corehq.util.metrics.tests.utils import patch_datadog
 from prometheus_client.samples import Sample
 from prometheus_client.utils import INF

From f1874e43522c7c8f871eb62cf128e17146425aa2 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 11:47:07 +0200
Subject: [PATCH 34/40] add script for running metrics endpoint

---
 deployment/prometheum_server.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 deployment/prometheum_server.py

diff --git a/deployment/prometheum_server.py b/deployment/prometheum_server.py
new file mode 100644
index 000000000000..1de11e7f2b04
--- /dev/null
+++ b/deployment/prometheum_server.py
@@ -0,0 +1,26 @@
+"""
+Simple WSGI server that exposes Prometheus metrics.
+
+Environment variable `prometheus_multiproc_dir` must be set and match
+the value used by Django.
+"""
+import os
+import threading
+from wsgiref.simple_server import make_server
+
+from prometheus_client import CollectorRegistry, make_wsgi_app, multiprocess
+from prometheus_client.exposition import _SilentHandler
+
+multiproc_dir = os.environ.get("prometheus_multiproc_dir")
+if not multiproc_dir:
+    raise Exception("Environment variable 'prometheus_multiproc_dir' is not set")
+
+print(f"Exposing metrics from '{multiproc_dir}'")
+
+registry = CollectorRegistry()
+multiprocess.MultiProcessCollector(registry)
+
+app = make_wsgi_app(registry)
+httpd = make_server('', 9011, app, handler_class=_SilentHandler)
+t = threading.Thread(target=httpd.serve_forever)
+t.start()

From d4220f06d756de09565fc54775a3cc1921d28777 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 12:13:34 +0200
Subject: [PATCH 35/40] update docs

---
 corehq/util/metrics/__init__.py |  1 +
 corehq/util/metrics/datadog.py  | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 17eacee76f88..410d53c2dad3 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -11,6 +11,7 @@
     'metrics_counter',
     'metrics_gauge',
     'metrics_histogram',
+    'metrics_gauge_task',
 ]
 
 _metrics = None
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index ee30af19f650..6b766d0fa350 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -52,16 +52,16 @@ def _histogram(self, name: str, value: float,
         This implementation of histogram uses tagging to record the buckets.
         It does not use the Datadog Histogram metric type.
 
-        The metric itself will be incremented by 1 on each call to `observe`. The value
-        passed to `observe` will be used to create the bucket tag.
+        The metric itself will be incremented by 1 on each call. The value
+        passed to `metrics_histogram` will be used to create the bucket tag.
 
         For example:
 
-            h = Histogram(
-                'commcare.request.duration', 'description',
-                bucket_tag='duration', buckets=[1,2,3], bucket_units='ms'
+            h = metrics_histogram(
+                'commcare.request.duration', 1.4,
+                bucket_tag='duration', buckets=[1,2,3], bucket_units='ms',
+                tags=tags
             )
-            h.observe(1.4)
 
             # resulting Datadog metric
             #    commcare.request.duration:1|c|#duration:lt_2ms

From 0457c000c5944cf12d3ca6ab55491c3ac57167c8 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 13:58:03 +0200
Subject: [PATCH 36/40] docs

---
 corehq/util/metrics/__init__.py   | 108 +++++++++++++++++++++++++++++-
 corehq/util/metrics/datadog.py    |  19 +++++-
 corehq/util/metrics/prometheus.py |  34 +++++++++-
 docs/index.rst                    |   1 +
 docs/metrics.rst                  |   1 +
 5 files changed, 156 insertions(+), 7 deletions(-)
 create mode 100644 docs/metrics.rst

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 410d53c2dad3..44855c9ba4c0 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -1,3 +1,102 @@
+"""
+Metrics collection
+******************
+
+.. contents::
+   :local:
+
+This package exposes functions and utilities to record metrics in CommCare. These metrics
+are exported / exposed to the configured metrics providers. Supported providers are:
+
+    * Datadog
+    * Prometheus
+
+Providers are enabled using the `METRICS_PROVIDER` setting. Multiple providers can be
+enabled concurrently:
+
+::
+
+    METRICS_PROVIDERS = [
+        'corehq.util.metrics.prometheus.PrometheusMetrics',
+        'corehq.util.metrics.datadog.DatadogMetrics',
+    ]
+
+If no metrics providers are configured CommCare will log all metrics to the `commcare.metrics` logger
+at the DEBUG level.
+
+Metric tagging
+==============
+Metrics may be tagged by passing a dictionary of tag names and values. Tags should be used
+add dimensions to a metric e.g. request type, response status.
+
+Tags should not originate from unbounded sources or sources with high dimensionality such as
+timestamps, user IDs, request IDs etc. Ideally a tag should not have more than 10 possible values.
+
+Read more about tagging:
+
+* https://prometheus.io/docs/practices/naming/#labels
+* https://docs.datadoghq.com/tagging/
+
+Metric Types
+============
+
+Counter metric
+''''''''''''''
+
+A counter is a cumulative metric that represents a single monotonically increasing counter
+whose value can only increase or be reset to zero on restart. For example, you can use a
+counter to represent the number of requests served, tasks completed, or errors.
+
+Do not use a counter to expose a value that can decrease. For example, do not use a counter
+for the number of currently running processes; instead use a gauge.
+
+::
+
+    metrics_counter('commcare.case_import.count', 1, tags={'domain': domain})
+
+
+Gauge metric
+''''''''''''
+
+A gauge is a metric that represents a single numerical value that can arbitrarily go up and down.
+
+Gauges are typically used for measured values like temperatures or current memory usage,
+but also "counts" that can go up and down, like the number of concurrent requests.
+
+::
+
+    metrics_gauge('commcare.case_import.queue_length', queue_length)
+
+For regular reporting of a gauge metric there is the `metrics_gauge_task` function:
+
+.. autofunction:: corehq.util.metrics.metrics_gauge_task
+
+Histogram metric
+''''''''''''''''
+
+A histogram samples observations (usually things like request durations or response sizes)
+and counts them in configurable buckets.
+
+::
+
+    metrics_histogram(
+        'commcare.case_import.duration', timer_duration,
+        bucket_tag='size', buckets=[10, 50, 200, 1000], bucket_unit='s',
+        tags={'domain': domain}
+    )
+
+Histograms are recorded differently in the different providers.
+
+.. automethod:: corehq.util.metrics.datadog.DatadogMetrics._histogram
+
+.. automethod:: corehq.util.metrics.prometheus.PrometheusMetrics._histogram
+
+
+Other Notes
+===========
+
+* All metrics must use the prefix 'commcare.'
+"""
 from functools import wraps
 from typing import Iterable
 
@@ -55,13 +154,16 @@ def metrics_histogram(
 
 def metrics_gauge_task(name, fn, run_every):
     """
-    helper for easily registering gauges to run periodically
+    Helper for easily registering gauges to run periodically
 
     To update a gauge on a schedule based on the result of a function
     just add to your app's tasks.py:
 
-        my_calculation = metrics_gauge_task('commcare.my.metric', my_calculation_function,
-                                            run_every=crontab(minute=0))
+    ::
+
+        my_calculation = metrics_gauge_task(
+            'commcare.my.metric', my_calculation_function, run_every=crontab(minute=0)
+        )
 
     """
     _enforce_prefix(name, 'commcare')
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index 6b766d0fa350..29baa01272ae 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -15,6 +15,13 @@
 
 
 class DatadogMetrics(HqMetrics):
+    """Datadog Metrics Provider
+
+    Settings:
+    * DATADOG_API_KEY
+    * DATADOG_APP_KEY
+    """
+
     def initialize(self):
         if not settings.DATADOG_API_KEY or not settings.DATADOG_APP_KEY:
             raise Exception(
@@ -38,10 +45,13 @@ def initialize(self):
                 pass
 
     def _counter(self, name: str, value: float, tags: dict = None, documentation: str = ''):
+        """Although this is submitted as a COUNT the Datadog app represents these as a RATE.
+        See https://docs.datadoghq.com/developers/metrics/types/?tab=rate#definition"""
         dd_tags = _format_tags(tags)
         _datadog_record(statsd.increment, name, value, dd_tags)
 
     def _gauge(self, name: str, value: float, tags: dict = None, documentation: str = ''):
+        """See https://docs.datadoghq.com/developers/metrics/types/?tab=gauge#definition"""
         dd_tags = _format_tags(tags)
         _datadog_record(statsd.gauge, name, value, dd_tags)
 
@@ -57,16 +67,19 @@ def _histogram(self, name: str, value: float,
 
         For example:
 
+        ::
+
             h = metrics_histogram(
                 'commcare.request.duration', 1.4,
                 bucket_tag='duration', buckets=[1,2,3], bucket_units='ms',
                 tags=tags
             )
 
-            # resulting Datadog metric
-            #    commcare.request.duration:1|c|#duration:lt_2ms
+            # resulting metrics
+            # commcare.request.duration:1|c|#duration:lt_2ms
+
+        For more explanation about why this implementation was chosen see:
 
-        For more details see:
         * https://github.com/dimagi/commcare-hq/pull/17080
         * https://github.com/dimagi/commcare-hq/pull/17030#issuecomment-315794700
         """
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
index ecb757dcc0e3..f3766870345a 100644
--- a/corehq/util/metrics/prometheus.py
+++ b/corehq/util/metrics/prometheus.py
@@ -8,17 +8,49 @@
 
 
 class PrometheusMetrics(HqMetrics):
-    def __init__(self):
+    """Prometheus Metrics Provider"""
+
+    def initialize(self):
         self._metrics = {}
 
     def _counter(self, name: str, value: float = 1, tags: dict = None, documentation: str = ''):
+        """See https://prometheus.io/docs/concepts/metric_types/#counter"""
         self._get_metric(PCounter, name, tags, documentation).inc(value)
 
     def _gauge(self, name: str, value: float, tags: dict = None, documentation: str = ''):
+        """See https://prometheus.io/docs/concepts/metric_types/#histogram"""
         self._get_metric(PGauge, name, tags, documentation).set(value)
 
     def _histogram(self, name: str, value: float, bucket_tag: str, buckets: List[int], bucket_unit: str = '',
                   tags: dict = None, documentation: str = ''):
+        """
+        A cumulative histogram with a base metric name of <basename> exposes multiple time series
+        during a scrape:
+
+        * cumulative counters for the observation buckets, exposed as
+          `<basename>_bucket{le="<upper inclusive bound>"}`
+        * the total sum of all observed values, exposed as `<basename>_sum`
+        * the count of events that have been observed, exposed as `<basename>_count`
+          (identical to `<basename>_bucket{le="+Inf"}` above)
+
+        For example
+        ::
+
+            h = metrics_histogram(
+                'commcare.request_duration', 1.4,
+                bucket_tag='duration', buckets=[1,2,3], bucket_units='ms',
+                tags=tags
+            )
+
+            # resulting metrics
+            # commcare_request_duration_bucket{...tags..., le="1.0"} 0.0
+            # commcare_request_duration_bucket{...tags..., le="2.0"} 1.0
+            # commcare_request_duration_bucket{...tags..., le="3.0"} 1.0
+            # commcare_request_duration_bucket{...tags..., le="+Inf"} 1.0
+            # commcare_request_duration_sum{...tags...} 1.4
+            # commcare_request_duration_count{...tags...} 1.0
+
+        See https://prometheus.io/docs/concepts/metric_types/#histogram"""
         self._get_metric(PHistogram, name, tags, documentation, buckets=buckets).observe(value)
 
     def _get_metric(self, metric_type, name, tags, documentation, **kwargs):
diff --git a/docs/index.rst b/docs/index.rst
index 697abee9a1b6..47845d2f3d93 100644
--- a/docs/index.rst
+++ b/docs/index.rst
@@ -48,6 +48,7 @@ Welcome to CommCareHQ's documentation!
    openmrs
    js-guide/README
    databases
+   metrics
 
 Tips for documenting
 --------------------
diff --git a/docs/metrics.rst b/docs/metrics.rst
new file mode 100644
index 000000000000..fdfaf57dc3b1
--- /dev/null
+++ b/docs/metrics.rst
@@ -0,0 +1 @@
+.. automodule:: corehq.util.metrics

From 3d7991264236ede1ee3f2bf850ae7486449ebce6 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Wed, 18 Mar 2020 16:12:21 +0200
Subject: [PATCH 37/40] do setup in __init__

---
 corehq/util/metrics/__init__.py   |  1 -
 corehq/util/metrics/datadog.py    | 19 +++++++++++--------
 corehq/util/metrics/prometheus.py |  2 +-
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 44855c9ba4c0..80315e3462c2 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -122,7 +122,6 @@ def _get_metrics_provider():
         providers = []
         for provider_path in settings.METRICS_PROVIDERS:
             provider = to_function(provider_path)()
-            provider.initialize()
             providers.append(provider)
 
         if not providers:
diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index 29baa01272ae..143aedebe41c 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -22,7 +22,17 @@ class DatadogMetrics(HqMetrics):
     * DATADOG_APP_KEY
     """
 
-    def initialize(self):
+    def __init__(self):
+        if settings.UNIT_TESTING or settings.DEBUG or 'ddtrace.contrib.django' not in settings.INSTALLED_APPS:
+            try:
+                from ddtrace import tracer
+                tracer.enabled = False
+            except ImportError:
+                pass
+
+        if settings.UNIT_TESTING:
+            return
+
         if not settings.DATADOG_API_KEY or not settings.DATADOG_APP_KEY:
             raise Exception(
                 "Datadog not configured."
@@ -37,13 +47,6 @@ def initialize(self):
         else:
             initialize(settings.DATADOG_API_KEY, settings.DATADOG_APP_KEY)
 
-        if settings.UNIT_TESTING or settings.DEBUG or 'ddtrace.contrib.django' not in settings.INSTALLED_APPS:
-            try:
-                from ddtrace import tracer
-                tracer.enabled = False
-            except ImportError:
-                pass
-
     def _counter(self, name: str, value: float, tags: dict = None, documentation: str = ''):
         """Although this is submitted as a COUNT the Datadog app represents these as a RATE.
         See https://docs.datadoghq.com/developers/metrics/types/?tab=rate#definition"""
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
index f3766870345a..3ddca3537153 100644
--- a/corehq/util/metrics/prometheus.py
+++ b/corehq/util/metrics/prometheus.py
@@ -10,7 +10,7 @@
 class PrometheusMetrics(HqMetrics):
     """Prometheus Metrics Provider"""
 
-    def initialize(self):
+    def __init__(self):
         self._metrics = {}
 
     def _counter(self, name: str, value: float = 1, tags: dict = None, documentation: str = ''):

From d4c3dc1b8f4f6ff443df985a1ea66c84accd9722 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 19 Mar 2020 10:52:08 +0200
Subject: [PATCH 38/40] simplify prometheus server

---
 deployment/{prometheum_server.py => prometheus_server.py} | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)
 rename deployment/{prometheum_server.py => prometheus_server.py} (90%)

diff --git a/deployment/prometheum_server.py b/deployment/prometheus_server.py
similarity index 90%
rename from deployment/prometheum_server.py
rename to deployment/prometheus_server.py
index 1de11e7f2b04..7e0a7da74e8a 100644
--- a/deployment/prometheum_server.py
+++ b/deployment/prometheus_server.py
@@ -5,7 +5,6 @@
 the value used by Django.
 """
 import os
-import threading
 from wsgiref.simple_server import make_server
 
 from prometheus_client import CollectorRegistry, make_wsgi_app, multiprocess
@@ -22,5 +21,4 @@
 
 app = make_wsgi_app(registry)
 httpd = make_server('', 9011, app, handler_class=_SilentHandler)
-t = threading.Thread(target=httpd.serve_forever)
-t.start()
+httpd.serve_forever()

From 1017c8512e5ad6f2f1f6728ab9f86397d2dfcdee Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 19 Mar 2020 11:28:47 +0200
Subject: [PATCH 39/40] doc updates

---
 corehq/util/metrics/__init__.py   | 2 +-
 corehq/util/metrics/prometheus.py | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/corehq/util/metrics/__init__.py b/corehq/util/metrics/__init__.py
index 80315e3462c2..5421865d9b53 100644
--- a/corehq/util/metrics/__init__.py
+++ b/corehq/util/metrics/__init__.py
@@ -27,7 +27,7 @@
 Metric tagging
 ==============
 Metrics may be tagged by passing a dictionary of tag names and values. Tags should be used
-add dimensions to a metric e.g. request type, response status.
+to add dimensions to a metric e.g. request type, response status.
 
 Tags should not originate from unbounded sources or sources with high dimensionality such as
 timestamps, user IDs, request IDs etc. Ideally a tag should not have more than 10 possible values.
diff --git a/corehq/util/metrics/prometheus.py b/corehq/util/metrics/prometheus.py
index 3ddca3537153..00d66b24711a 100644
--- a/corehq/util/metrics/prometheus.py
+++ b/corehq/util/metrics/prometheus.py
@@ -28,10 +28,10 @@ def _histogram(self, name: str, value: float, bucket_tag: str, buckets: List[int
         during a scrape:
 
         * cumulative counters for the observation buckets, exposed as
-          `<basename>_bucket{le="<upper inclusive bound>"}`
-        * the total sum of all observed values, exposed as `<basename>_sum`
-        * the count of events that have been observed, exposed as `<basename>_count`
-          (identical to `<basename>_bucket{le="+Inf"}` above)
+          `<name>_bucket{le="<upper inclusive bound>"}`
+        * the total sum of all observed values, exposed as `<name>_sum`
+        * the count of events that have been observed, exposed as `<name>_count`
+          (identical to `<name>_bucket{le="+Inf"}` above)
 
         For example
         ::

From d18075dd8a9a72005d12a35bf4f3386fc43a8629 Mon Sep 17 00:00:00 2001
From: Simon Kelly <skelly@dimagi.com>
Date: Thu, 19 Mar 2020 12:27:08 +0200
Subject: [PATCH 40/40] Apply suggestions from code review

Co-Authored-By: Norman Hooper <nhooper@dimagi.com>
---
 corehq/util/metrics/datadog.py     | 4 ++--
 corehq/util/metrics/tests/utils.py | 7 +++----
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/corehq/util/metrics/datadog.py b/corehq/util/metrics/datadog.py
index 143aedebe41c..461c94b4048d 100644
--- a/corehq/util/metrics/datadog.py
+++ b/corehq/util/metrics/datadog.py
@@ -35,8 +35,8 @@ def __init__(self):
 
         if not settings.DATADOG_API_KEY or not settings.DATADOG_APP_KEY:
             raise Exception(
-                "Datadog not configured."
-                "Set DATADOG_API_KEY and DATADOG_APP_KEY in settings or update METRICS_PROVIDERS"
+                "Datadog not configured. "
+                "Set DATADOG_API_KEY and DATADOG_APP_KEY in settings or update METRICS_PROVIDERS "
                 "to remove the Datadog provider."
             )
 
diff --git a/corehq/util/metrics/tests/utils.py b/corehq/util/metrics/tests/utils.py
index 3c012c0ff9d5..6c4c3edcc9ee 100644
--- a/corehq/util/metrics/tests/utils.py
+++ b/corehq/util/metrics/tests/utils.py
@@ -7,10 +7,9 @@
 @contextmanager
 def patch_datadog():
     def record(fn, name, value, tags=None):
-        key = tuple([
-            name,
-            tuple(sorted(tuple(t.split(':')) for t in (tags or []))),
-        ])
+        def get_tag_pairs(tags: list):
+            return tuple(sorted(tuple(t.split(':', 1)) for t in tags))
+        key = (name, get_tag_pairs(tags or []))
         stats[key].append(value)
 
     stats = defaultdict(list)