Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Python Monitoring instrumentation - Proof of Concept #269

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions kubernetes-manifests/userservice.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,12 @@ spec:
# Valid levels are debug, info, warning, error, critical. If no valid level is set, gunicorn will default to info.
- name: LOG_LEVEL
value: "info"
- name: NAMESPACE
valueFrom:
fieldRef:
fieldPath: metadata.namespace
- name: CONTAINER_NAME
value: "userservice"
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

As explained in this issue census-instrumentation/opencensus-python#796 (comment)
the stackdriver exporter needs these env vars explicitly declared

envFrom:
- configMapRef:
name: environment-config
Expand Down
29 changes: 28 additions & 1 deletion src/contacts/contacts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,18 +24,44 @@
import sys

import jwt
from easy_profile import EasyProfileMiddleware
from easy_profile.reporters import Reporter
from flask import Flask, jsonify, request
import bleach
from opentelemetry import trace
from opentelemetry import metrics, trace
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In the contacts service, I tried to instrument metrics reported by the sqlalchemy-easy-profile using opentelemetry. However, the Cloud Monitoring exporter doesn't fully support all metric types yet

from opentelemetry.exporter.cloud_monitoring import CloudMonitoringMetricsExporter
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
from opentelemetry.exporter.cloud_trace.cloud_trace_propagator import CloudTraceFormatPropagator
from opentelemetry.ext.flask import FlaskInstrumentor
from opentelemetry.propagators import set_global_httptextformat
from opentelemetry.sdk.metrics import Counter, MeterProvider
from opentelemetry.sdk.trace import TracerProvider
from opentelemetry.sdk.trace.export import SimpleExportSpanProcessor
from sqlalchemy.exc import OperationalError, SQLAlchemyError
from db import ContactsDb

class CloudMonitoringReporter(Reporter):

def __init__(self):
metrics.set_meter_provider(MeterProvider())
exporter = CloudMonitoringMetricsExporter()
meter = metrics.get_meter(__name__)
metrics.get_meter_provider().start_pipeline(meter, exporter)

self.query_count = meter.create_metric(
name="query_count",
description="number of queries made by each endpoint",
unit="1",
value_type=int,
metric_type=Counter,
label_keys=("endpoint"),
enabled=True,
)


def report(self, path, stats):
self.query_count.add(stats['total'], {'endpoint': path})


def create_app():
"""Flask application factory to create instances
Expand All @@ -54,6 +80,7 @@ def create_app():

# Add Flask auto-instrumentation for tracing
FlaskInstrumentor().instrument_app(app)
app.wsgi_app = EasyProfileMiddleware(app.wsgi_app, reporter=CloudMonitoringReporter())

# Disabling unused-variable for lines with route decorated functions
# as pylint thinks they are unused
Expand Down
5 changes: 4 additions & 1 deletion src/contacts/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -3,9 +3,12 @@ pyjwt==1.7.1
cryptography==2.9
gunicorn==20.0.4
bleach==3.1.4
psycopg2==2.7.7
psycopg2-binary==2.8.5
sqlalchemy==1.3.16
sqlalchemy-easy-profile==1.1.0
opentelemetry-sdk==0.10b0
opentelemetry-exporter-cloud-monitoring==0.10b0
opentelemetry-exporter-cloud-trace==0.10b0
opentelemetry-ext-flask==0.10b0
opentelemetry-ext-sqlalchemy==0.10b.0
opentelemetry-ext-system-metrics==0.10b.0
16 changes: 11 additions & 5 deletions src/contacts/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,10 @@ chardet==3.0.4 # via requests
click==7.1.1 # via flask
cryptography==2.9 # via -r requirements.in
flask==1.1.2 # via -r requirements.in, opentelemetry-ext-flask
google-api-core[grpc]==1.21.0 # via google-cloud-core, google-cloud-trace
google-api-core[grpc]==1.21.0 # via google-cloud-core, google-cloud-monitoring, google-cloud-trace
google-auth==1.18.0 # via google-api-core
google-cloud-core==1.3.0 # via google-cloud-trace
google-cloud-monitoring==1.0.0 # via opentelemetry-exporter-cloud-monitoring
google-cloud-trace==0.23.0 # via opentelemetry-exporter-cloud-trace
googleapis-common-protos==1.52.0 # via google-api-core
grpcio==1.30.0 # via google-api-core
Expand All @@ -23,15 +24,18 @@ idna==2.10 # via requests
itsdangerous==1.1.0 # via flask
jinja2==2.11.1 # via flask
markupsafe==1.1.1 # via jinja2
opentelemetry-api==0.10b0 # via opentelemetry-exporter-cloud-trace, opentelemetry-ext-flask, opentelemetry-ext-sqlalchemy, opentelemetry-ext-wsgi, opentelemetry-instrumentation, opentelemetry-sdk
opentelemetry-api==0.10b0 # via opentelemetry-exporter-cloud-monitoring, opentelemetry-exporter-cloud-trace, opentelemetry-ext-flask, opentelemetry-ext-sqlalchemy, opentelemetry-ext-system-metrics, opentelemetry-ext-wsgi, opentelemetry-instrumentation, opentelemetry-sdk
opentelemetry-exporter-cloud-monitoring==0.10b0 # via -r requirements.in
opentelemetry-exporter-cloud-trace==0.10b0 # via -r requirements.in
opentelemetry-ext-flask==0.10b0 # via -r requirements.in
opentelemetry-ext-sqlalchemy==0.10b.0 # via -r requirements.in
opentelemetry-ext-system-metrics==0.10b.0 # via -r requirements.in
opentelemetry-ext-wsgi==0.10b0 # via opentelemetry-ext-flask
opentelemetry-instrumentation==0.10b0 # via opentelemetry-ext-flask, opentelemetry-ext-sqlalchemy, opentelemetry-ext-wsgi
opentelemetry-sdk==0.10b0 # via -r requirements.in, opentelemetry-exporter-cloud-trace
opentelemetry-sdk==0.10b0 # via -r requirements.in, opentelemetry-exporter-cloud-monitoring, opentelemetry-exporter-cloud-trace
protobuf==3.12.2 # via google-api-core, googleapis-common-protos
psycopg2==2.7.7 # via -r requirements.in
psutil==5.7.0 # via opentelemetry-ext-system-metrics
psycopg2-binary==2.8.5 # via -r requirements.in
pyasn1-modules==0.2.8 # via google-auth
pyasn1==0.4.8 # via pyasn1-modules, rsa
pycparser==2.20 # via cffi
Expand All @@ -40,7 +44,9 @@ pytz==2020.1 # via google-api-core
requests==2.24.0 # via google-api-core
rsa==4.6 # via google-auth
six==1.14.0 # via bleach, cryptography, google-api-core, google-auth, grpcio, protobuf
sqlalchemy==1.3.16 # via -r requirements.in, opentelemetry-ext-sqlalchemy
sqlalchemy-easy-profile==1.1.0 # via -r requirements.in
sqlalchemy==1.3.16 # via -r requirements.in, opentelemetry-ext-sqlalchemy, sqlalchemy-easy-profile
sqlparse==0.3.1 # via sqlalchemy-easy-profile
urllib3==1.25.9 # via requests
webencodings==0.5.1 # via bleach
werkzeug==1.0.1 # via flask
Expand Down
5 changes: 4 additions & 1 deletion src/userservice/requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,11 @@ cryptography==2.9
gunicorn==20.0.4
bcrypt==3.1.7
bleach==3.1.4
psycopg2==2.7.7
psycopg2-binary
sqlalchemy==1.3.16
sqlalchemy-easy-profile==1.1.0
opencensus==0.7.10
opencensus-ext-stackdriver==0.7.3
opentelemetry-sdk==0.10b0
opentelemetry-exporter-cloud-trace==0.10b0
opentelemetry-ext-flask==0.10b0
Expand Down
14 changes: 10 additions & 4 deletions src/userservice/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,21 @@ chardet==3.0.4 # via requests
click==7.1.1 # via flask
cryptography==2.9 # via -r requirements.in
flask==1.1.2 # via -r requirements.in, opentelemetry-ext-flask
google-api-core[grpc]==1.21.0 # via google-cloud-core, google-cloud-trace
google-api-core[grpc]==1.21.0 # via google-cloud-core, google-cloud-monitoring, google-cloud-trace, opencensus
google-auth==1.18.0 # via google-api-core
google-cloud-core==1.3.0 # via google-cloud-trace
google-cloud-trace==0.23.0 # via opentelemetry-exporter-cloud-trace
google-cloud-monitoring==0.36.0 # via opencensus-ext-stackdriver
google-cloud-trace==0.23.0 # via opencensus-ext-stackdriver, opentelemetry-exporter-cloud-trace
googleapis-common-protos==1.52.0 # via google-api-core
grpcio==1.30.0 # via google-api-core
gunicorn==20.0.4 # via -r requirements.in
idna==2.10 # via requests
itsdangerous==1.1.0 # via flask
jinja2==2.11.1 # via flask
markupsafe==1.1.1 # via jinja2
opencensus-context==0.1.1 # via opencensus
opencensus-ext-stackdriver==0.7.3 # via -r requirements.in
opencensus==0.7.10 # via -r requirements.in, opencensus-ext-stackdriver
opentelemetry-api==0.10b0 # via opentelemetry-exporter-cloud-trace, opentelemetry-ext-flask, opentelemetry-ext-sqlalchemy, opentelemetry-ext-wsgi, opentelemetry-instrumentation, opentelemetry-sdk
opentelemetry-exporter-cloud-trace==0.10b0 # via -r requirements.in
opentelemetry-ext-flask==0.10b0 # via -r requirements.in
Expand All @@ -32,7 +36,7 @@ opentelemetry-ext-wsgi==0.10b0 # via opentelemetry-ext-flask
opentelemetry-instrumentation==0.10b0 # via opentelemetry-ext-flask, opentelemetry-ext-sqlalchemy, opentelemetry-ext-wsgi
opentelemetry-sdk==0.10b0 # via -r requirements.in, opentelemetry-exporter-cloud-trace
protobuf==3.12.2 # via google-api-core, googleapis-common-protos
psycopg2==2.7.7 # via -r requirements.in
psycopg2-binary==2.8.5 # via -r requirements.in
pyasn1-modules==0.2.8 # via google-auth
pyasn1==0.4.8 # via pyasn1-modules, rsa
pycparser==2.20 # via cffi
Expand All @@ -41,7 +45,9 @@ pytz==2020.1 # via google-api-core
requests==2.24.0 # via google-api-core
rsa==4.6 # via google-auth
six==1.14.0 # via bcrypt, bleach, cryptography, google-api-core, google-auth, grpcio, protobuf
sqlalchemy==1.3.16 # via -r requirements.in, opentelemetry-ext-sqlalchemy
sqlalchemy-easy-profile==1.1.0 # via -r requirements.in
sqlalchemy==1.3.16 # via -r requirements.in, opentelemetry-ext-sqlalchemy, sqlalchemy-easy-profile
sqlparse==0.3.1 # via sqlalchemy-easy-profile
urllib3==1.25.9 # via requests
webencodings==0.5.1 # via bleach
werkzeug==1.0.1 # via flask
Expand Down
38 changes: 37 additions & 1 deletion src/userservice/userservice.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,13 @@

import bcrypt
import jwt
from easy_profile import EasyProfileMiddleware
from easy_profile.reporters import Reporter
from flask import Flask, jsonify, request
import bleach
from opencensus.ext.stackdriver import stats_exporter
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because opentelemetry's cloud monitoring support is incomplete, I tried using OpenCensus to export the sqlachemy metrics in the userservice, which worked as expected.

from opencensus.stats import aggregation, measure, stats as oc_stats, view
from opencensus.tags import tag_key, tag_map, tag_value
from opentelemetry import trace
from opentelemetry.exporter.cloud_trace import CloudTraceSpanExporter
from opentelemetry.exporter.cloud_trace.cloud_trace_propagator import CloudTraceFormatPropagator
Expand All @@ -37,6 +42,34 @@
from sqlalchemy.exc import OperationalError, SQLAlchemyError
from db import UserDb

class CloudMonitoringReporter(Reporter):

def __init__(self):
self.query_latency = measure.MeasureFloat(
"query_latency",
"The total latency of queries in seconds for a request",
"s")

self.latency_view = view.View(
"query_latency_distribution",
"The distribution of the query latencies",
[],
self.query_latency,
# Latency in buckets: [>=0ms, >=1ms, >=2ms, >=4ms, >=10ms, >=20ms, >=40ms]
aggregation.DistributionAggregation(
[.001, .002, .004, .01, .02, .04]))

oc_stats.stats.view_manager.register_view(self.latency_view)
exporter = stats_exporter.new_stats_exporter()
oc_stats.stats.view_manager.register_exporter(exporter)

def report(self, path, stats):
mmap = oc_stats.stats.stats_recorder.new_measurement_map()
mmap.measure_float_put(self.query_latency, stats['duration'])
tmap = tag_map.TagMap()
tmap.insert(tag_key.TagKey("path"), tag_value.TagValue(path))
mmap.record(tmap)


def create_app():
"""Flask application factory to create instances
Expand All @@ -56,6 +89,9 @@ def create_app():
# Add Flask auto-instrumentation for tracing
FlaskInstrumentor().instrument_app(app)

# automatically collect query stats for each request endpoint and send to the Reporter
app.wsgi_app = EasyProfileMiddleware(app.wsgi_app, reporter=CloudMonitoringReporter())

# Disabling unused-variable for lines with route decorated functions
# as pylint thinks they are unused
# pylint: disable=unused-variable
Expand Down Expand Up @@ -130,7 +166,7 @@ def create_user():
app.logger.info("Successfully created user.")

except UserWarning as warn:
app.logger.error("Error creating new user: %s", str(warn))
# app.logger.error("Error creating new user: %s", str(warn))
return str(warn), 400
except NameError as err:
app.logger.error("Error creating new user: %s", str(err))
Expand Down