Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
136 changes: 88 additions & 48 deletions ddtrace/internal/_encoding.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ from ..constants import _ORIGIN_KEY as ORIGIN_KEY
from .constants import SPAN_LINKS_KEY
from .constants import SPAN_EVENTS_KEY
from .constants import MAX_UINT_64BITS
from .logger import get_logger
from .._trace._limits import MAX_SPAN_META_VALUE_LEN
from .._trace._limits import TRUNCATED_SPAN_ATTRIBUTE_LEN
from ..settings._agent import config as agent_config
Expand All @@ -30,6 +31,8 @@ from ..settings._agent import config as agent_config
DEF MSGPACK_ARRAY_LENGTH_PREFIX_SIZE = 5
DEF MSGPACK_STRING_TABLE_LENGTH_PREFIX_SIZE = 6

cdef object log = get_logger(__name__)


cdef extern from "Python.h":
const char* PyUnicode_AsUTF8(object o)
Expand Down Expand Up @@ -703,59 +706,79 @@ cdef class MsgpackEncoderV04(MsgpackEncoderBase):
cdef Py_ssize_t L
cdef int ret
cdef dict d
cdef list m

if PyDict_CheckExact(meta):
d = <dict> meta
L = len(d) + (dd_origin is not NULL) + (len(span_events) > 0)
if L > ITEM_LIMIT:
raise ValueError("dict is too large")
if not PyDict_CheckExact(meta):
raise TypeError("Unhandled meta type: %r" % type(meta))

ret = msgpack_pack_map(&self.pk, L)
if ret == 0:
for k, v in d.items():
ret = pack_text(&self.pk, k)
if ret != 0:
break
ret = pack_text(&self.pk, v)
if ret != 0:
break
if dd_origin is not NULL:
ret = pack_bytes(&self.pk, _ORIGIN_KEY, _ORIGIN_KEY_LEN)
if ret == 0:
ret = pack_bytes(&self.pk, dd_origin, strlen(dd_origin))
if ret != 0:
return ret
if span_events:
ret = pack_text(&self.pk, SPAN_EVENTS_KEY)
if ret == 0:
ret = pack_text(&self.pk, span_events)
return ret
d = <dict> meta

raise TypeError("Unhandled meta type: %r" % type(meta))
# Filter meta to only str/bytes values
m = []
for k, v in d.items():
if PyUnicode_Check(v) or PyBytesLike_Check(v):
m.append((k, v))
else:
log.warning("Meta key %r has non-string value %r, skipping", k, v)

L = len(m) + (dd_origin is not NULL) + (len(span_events) > 0)
if L > ITEM_LIMIT:
raise ValueError("dict is too large")

ret = msgpack_pack_map(&self.pk, L)
if ret == 0:
for k, v in m:
ret = pack_text(&self.pk, k)
if ret != 0:
break
ret = pack_text(&self.pk, v)
if ret != 0:
break
if dd_origin is not NULL:
ret = pack_bytes(&self.pk, _ORIGIN_KEY, _ORIGIN_KEY_LEN)
if ret == 0:
ret = pack_bytes(&self.pk, dd_origin, strlen(dd_origin))
if ret != 0:
return ret
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: this can be a break too. It would be nice to be consistent

if span_events:
ret = pack_text(&self.pk, SPAN_EVENTS_KEY)
if ret == 0:
ret = pack_text(&self.pk, span_events)
return ret

cdef inline int _pack_metrics(self, object metrics) except? -1:
cdef Py_ssize_t L
cdef int ret
cdef dict d
cdef list m

if PyDict_CheckExact(metrics):
d = <dict> metrics
L = len(d)
if L > ITEM_LIMIT:
raise ValueError("dict is too large")
if not PyDict_CheckExact(metrics):
raise TypeError("Unhandled metrics type: %r" % type(metrics))

ret = msgpack_pack_map(&self.pk, L)
if ret == 0:
for k, v in d.items():
ret = pack_text(&self.pk, k)
if ret != 0:
break
ret = pack_number(&self.pk, v)
if ret != 0:
break
return ret
d = <dict> metrics
m = []

# Filter metrics to only number values
for k, v in d.items():
if PyLong_Check(v) or PyFloat_Check(v):
m.append((k, v))
else:
log.warning("Metric key %r has non-numeric value %r, skipping", k, v)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: Should we include the span id here? This would help with debugging.


L = len(m)
if L > ITEM_LIMIT:
raise ValueError("dict is too large")

raise TypeError("Unhandled metrics type: %r" % type(metrics))
ret = msgpack_pack_map(&self.pk, L)
if ret == 0:
for k, v in m:
ret = pack_text(&self.pk, k)
if ret != 0:
break
ret = pack_number(&self.pk, v)
if ret != 0:
break
return ret

cdef int pack_span(self, object span, unsigned long long trace_id_64bits, void *dd_origin) except? -1:
cdef int ret
Expand Down Expand Up @@ -1035,6 +1058,7 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):

cdef int pack_span(self, object span, unsigned long long trace_id_64bits, void *dd_origin) except? -1:
cdef int ret
cdef list meta, metrics

ret = msgpack_pack_array(&self.pk, 12)
if ret != 0:
Expand Down Expand Up @@ -1089,14 +1113,22 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):
if span._events:
span_events = json_dumps([vars(event)() for event in span._events])

# Filter meta to only str/bytes values
meta = []
for k, v in span._meta.items():
if PyUnicode_Check(v) or PyBytesLike_Check(v):
meta.append((k, v))
else:
log.warning("Meta key %r has non-string value %r, skipping", k, v)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same here: span id


ret = msgpack_pack_map(
&self.pk,
len(span._meta) + (dd_origin is not NULL) + (len(span_links) > 0) + (len(span_events) > 0)
len(meta) + (dd_origin is not NULL) + (len(span_links) > 0) + (len(span_events) > 0)
)
if ret != 0:
return ret
if span._meta:
for k, v in span._meta.items():
if meta:
for k, v in meta:
ret = self._pack_string(k)
if ret != 0:
return ret
Expand Down Expand Up @@ -1125,11 +1157,19 @@ cdef class MsgpackEncoderV05(MsgpackEncoderBase):
if ret != 0:
return ret

ret = msgpack_pack_map(&self.pk, len(span._metrics))
# Filter metrics to only number values
metrics = []
for k, v in span._metrics.items():
if PyLong_Check(v) or PyFloat_Check(v):
metrics.append((k, v))
else:
log.warning("Metric key %r has non-numeric value %r, skipping", k, v)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit 2: span id here would be helpful so we can better detect the source of unexpected metric types.


ret = msgpack_pack_map(&self.pk, len(metrics))
if ret != 0:
return ret
if span._metrics:
for k, v in span._metrics.items():
if metrics:
for k, v in metrics:
ret = self._pack_string(k)
if ret != 0:
return ret
Expand Down
31 changes: 20 additions & 11 deletions tests/integration/test_integration_snapshots.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
# -*- coding: utf-8 -*-
import logging
import os

import mock
Expand Down Expand Up @@ -216,42 +217,50 @@ def test_wrong_span_name_type_not_sent():
({"env": "my-env", "tag1": "some_str_1", "tag2": "some_str_2", "tag3": [1, 2, 3]}),
({"env": "test-env", b"tag1": {"wrong_type": True}, b"tag2": "some_str_2", b"tag3": "some_str_3"}),
({"env": "my-test-env", "😐": "some_str_1", b"tag2": "some_str_2", "unicode": 12345}),
({"env": set([1, 2, 3])}),
({"env": None}),
({"env": True}),
({"env": 1.0}),
],
)
@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
def test_trace_with_wrong_meta_types_not_sent(encoding, meta, monkeypatch):
"""Wrong meta types should raise TypeErrors during encoding and fail to send to the agent."""
with override_global_config(dict(_trace_api=encoding)):
with mock.patch("ddtrace._trace.span.log") as log:
logger = logging.getLogger("ddtrace.internal._encoding")
with mock.patch.object(logger, "warning") as log_warning:
with tracer.trace("root") as root:
root._meta = meta
for _ in range(299):
with tracer.trace("child") as child:
child._meta = meta
log.exception.assert_called_once_with("error closing trace")

assert log_warning.call_count == 300
log_warning.assert_called_with("Meta key %r has non-string value %r, skipping", mock.ANY, mock.ANY)


@pytest.mark.parametrize(
"metrics",
"metrics,expected_warning_count",
[
({"num1": 12345, "num2": 53421, "num3": 1, "num4": "not-a-number"}),
({b"num1": 123.45, b"num2": [1, 2, 3], b"num3": 11.0, b"num4": 1.20}),
({"😐": "123.45", b"num2": "1", "num3": {"is_number": False}, "num4": "12345"}),
({"num1": 12345, "num2": 53421, "num3": 1, "num4": "not-a-number"}, 300),
({b"num1": 123.45, b"num2": [1, 2, 3], b"num3": 11.0, b"num4": 1.20}, 300),
({"😐": "123.45", b"num2": "1", "num3": {"is_number": False}, "num4": "12345"}, 1200),
],
)
@pytest.mark.parametrize("encoding", ["v0.4", "v0.5"])
@snapshot()
@pytest.mark.xfail
def test_trace_with_wrong_metrics_types_not_sent(encoding, metrics, monkeypatch):
def test_trace_with_wrong_metrics_types_not_sent(encoding, metrics, expected_warning_count):
"""Wrong metric types should raise TypeErrors during encoding and fail to send to the agent."""
with override_global_config(dict(_trace_api=encoding)):
with mock.patch("ddtrace._trace.span.log") as log:
logger = logging.getLogger("ddtrace.internal._encoding")
with mock.patch.object(logger, "warning") as log_warning:
with tracer.trace("root") as root:
root._metrics = metrics
for _ in range(299):
with tracer.trace("child") as child:
child._metrics = metrics
log.exception.assert_called_once_with("error closing trace")

assert log_warning.call_count == expected_warning_count
log_warning.assert_called_with("Metric key %r has non-numeric value %r, skipping", mock.ANY, mock.ANY)


@pytest.mark.subprocess()
Expand Down
43 changes: 38 additions & 5 deletions tests/tracer/test_encoders.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,8 @@
import random
import string
import threading
from typing import Any
from typing import Dict
from unittest import TestCase

from hypothesis import given
Expand Down Expand Up @@ -937,13 +939,9 @@ def _value():
{"start_ns": []},
{"duration_ns": {}},
{"span_type": 100},
{"_meta": {"num": 100}},
# Validating behavior with a context manager is a customer regression
{"_meta": {"key": _value()}},
{"_metrics": {"key": "value"}},
],
)
def test_encoding_invalid_data(data):
def test_encoding_invalid_data_raises(data):
encoder = MsgpackEncoderV04(1 << 20, 1 << 20)

span = Span(name="test")
Expand All @@ -959,6 +957,41 @@ def test_encoding_invalid_data(data):
assert (not encoded_traces) or (encoded_traces[0][0] is None)


@pytest.mark.parametrize(
"meta,metrics",
[
({"num": 100}, {}),
# Validating behavior with a context manager is a customer regression
({"key": _value()}, {}),
({}, {"key": "value"}),
],
)
def test_encoding_invalid_data_ok(meta: Dict[str, Any], metrics: Dict[str, Any]):
"""Encoding invalid meta/metrics data should not raise an exception"""
encoder = MsgpackEncoderV04(1 << 20, 1 << 20)

span = Span(name="test")
span._meta = meta # type: ignore
span._metrics = metrics # type: ignore

trace = [span]
encoder.put(trace)

encoded_payloads = encoder.encode()
assert len(encoded_payloads) == 1

# Ensure it can be decoded properly
traces = msgpack.unpackb(encoded_payloads[0][0], raw=False)
assert len(traces) == 1
assert len(traces[0]) == 1

# We didn't encode the invalid meta/metrics
for key in meta.keys():
assert key not in traces[0][0]["meta"]
for key in metrics.keys():
assert key not in traces[0][0]["metrics"]


@allencodings
def test_custom_msgpack_encode_thread_safe(encoding):
class TracingThread(threading.Thread):
Expand Down
Loading