Skip to content

Fix issue where deadlock can occur over logging._lock #4636

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 11 commits into
base: main
Choose a base branch
from
3 changes: 3 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,9 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## Unreleased

- Overwrite logging.config.fileConfig and logging.config.dictConfig to ensure
the OTLP `LogHandler` remains attached to the root logger. Fix a bug that
can cause a deadlock to occur over `logging._lock` in some cases ([#4636](https://github.com/open-telemetry/opentelemetry-python/pull/4636)).
- Update OTLP proto to v1.7 [#4645](https://github.com/open-telemetry/opentelemetry-python/pull/4645).
- Add `event_name` as a top level field in the `LogRecord`. Events are now simply logs with the
`event_name` field set, the logs SDK should be used to emit events ([#4652](https://github.com/open-telemetry/opentelemetry-python/pull/4652)).
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
from __future__ import annotations

import logging
import logging.config
import os
from abc import ABC, abstractmethod
from os import environ
Expand Down Expand Up @@ -253,31 +254,32 @@ def _init_logging(
set_event_logger_provider(event_logger_provider)

if setup_logging_handler:
_patch_basic_config()

# Add OTel handler
handler = LoggingHandler(
level=logging.NOTSET, logger_provider=provider
)
logging.getLogger().addHandler(handler)
_overwrite_logging_config_fns(handler)


def _patch_basic_config():
original_basic_config = logging.basicConfig
def _overwrite_logging_config_fns(handler):
root = logging.getLogger()

def patched_basic_config(*args, **kwargs):
root = logging.getLogger()
has_only_otel = len(root.handlers) == 1 and isinstance(
root.handlers[0], LoggingHandler
)
if has_only_otel:
otel_handler = root.handlers.pop()
original_basic_config(*args, **kwargs)
root.addHandler(otel_handler)
else:
original_basic_config(*args, **kwargs)
def wrapper(config_fn):
def overwritten_config_fn(*args, **kwargs):
# This is needed for basicConfig only. basicConfig when called by
# the user's program will be a no-op if the root handler was configured.
if len(root.handlers) == 1 and root.handlers[0] == handler:
root.handlers.pop()
config_fn(*args, **kwargs)
if handler not in root.handlers:
root.addHandler(handler)

return overwritten_config_fn

logging.basicConfig = patched_basic_config
logging.config.fileConfig = wrapper(logging.config.fileConfig)
logging.config.dictConfig = wrapper(logging.config.dictConfig)
logging.basicConfig = wrapper(logging.basicConfig)


def _import_exporters(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -551,6 +551,7 @@ def __init__(
) -> None:
super().__init__(level=level)
self._logger_provider = logger_provider or get_logger_provider()
# self.flushOnClose = False

@staticmethod
def _get_attributes(record: logging.LogRecord) -> _ExtendedAttributes:
Expand Down Expand Up @@ -643,7 +644,8 @@ def flush(self) -> None:
if hasattr(self._logger_provider, "force_flush") and callable(
self._logger_provider.force_flush
):
self._logger_provider.force_flush()
thread = threading.Thread(target=self._logger_provider.force_flush)
thread.start()


class Logger(APILogger):
Expand Down
27 changes: 27 additions & 0 deletions opentelemetry-sdk/tests/logs/test_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,33 @@ def test_handler_custom_log_level(self):
logger.critical("No Time For Caution")
self.assertEqual(processor.emit_count(), 2)

def test_handler_calling_flush_does_not_cause_deadlock(self): # pylint: disable=no-self-use
class LogProcessorThatAccessesLockOnFlush(LogRecordProcessor):
def on_emit(self, log_data: LogData):
pass

def shutdown(self):
pass

def force_flush(self, timeout_millis: int = 30000):
# Deadlock will happen here IF `flush` starts a new thread
# and then blocks, if it just starts a thread and then returns
# we don't seem to encounter the issue..
with logging._lock: # pylint: disable=protected-access
pass

logger_provider = LoggerProvider()
processor = LogProcessorThatAccessesLockOnFlush()
logger_provider.add_log_record_processor(processor)
handler = LoggingHandler(logger_provider=logger_provider)
logging.getLogger().addHandler(handler)
# The below code is essentially recreating what is causing the problem inside
# logging.config.dictConfig. Actually calling logging.config.dictConfig will modify
# global state inside the logging module and break lots of tests.
with logging._lock: # pylint: disable=protected-access
for handler in logging.getLogger().handlers:
handler.flush()

# pylint: disable=protected-access
def test_log_record_emit_noop(self):
noop_logger_provder = NoOpLoggerProvider()
Expand Down
145 changes: 104 additions & 41 deletions opentelemetry-sdk/tests/test_configurator.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from __future__ import annotations

import logging
import logging.config
from logging import WARNING, getLogger
from os import environ
from typing import Iterable, Optional, Sequence
Expand Down Expand Up @@ -624,48 +625,50 @@ def tearDown(self):
]

def test_logging_init_empty(self):
auto_resource = Resource.create(
{
"telemetry.auto.version": "auto-version",
}
)
_init_logging({}, resource=auto_resource)
self.assertEqual(self.set_provider_mock.call_count, 1)
provider = self.set_provider_mock.call_args[0][0]
self.assertIsInstance(provider, DummyLoggerProvider)
self.assertIsInstance(provider.resource, Resource)
self.assertEqual(
provider.resource.attributes.get("telemetry.auto.version"),
"auto-version",
)
self.event_logger_provider_mock.assert_called_once_with(
logger_provider=provider
)
self.set_event_logger_provider_mock.assert_called_once_with(
self.event_logger_provider_instance_mock
)
with ResetGlobalLoggingState():
auto_resource = Resource.create(
{
"telemetry.auto.version": "auto-version",
}
)
_init_logging({}, resource=auto_resource)
self.assertEqual(self.set_provider_mock.call_count, 1)
provider = self.set_provider_mock.call_args[0][0]
self.assertIsInstance(provider, DummyLoggerProvider)
self.assertIsInstance(provider.resource, Resource)
self.assertEqual(
provider.resource.attributes.get("telemetry.auto.version"),
"auto-version",
)
self.event_logger_provider_mock.assert_called_once_with(
logger_provider=provider
)
self.set_event_logger_provider_mock.assert_called_once_with(
self.event_logger_provider_instance_mock
)

@patch.dict(
environ,
{"OTEL_RESOURCE_ATTRIBUTES": "service.name=otlp-service"},
)
def test_logging_init_exporter(self):
resource = Resource.create({})
_init_logging({"otlp": DummyOTLPLogExporter}, resource=resource)
self.assertEqual(self.set_provider_mock.call_count, 1)
provider = self.set_provider_mock.call_args[0][0]
self.assertIsInstance(provider, DummyLoggerProvider)
self.assertIsInstance(provider.resource, Resource)
self.assertEqual(
provider.resource.attributes.get("service.name"),
"otlp-service",
)
self.assertIsInstance(provider.processor, DummyLogRecordProcessor)
self.assertIsInstance(
provider.processor.exporter, DummyOTLPLogExporter
)
getLogger(__name__).error("hello")
self.assertTrue(provider.processor.exporter.export_called)
with ResetGlobalLoggingState():
resource = Resource.create({})
_init_logging({"otlp": DummyOTLPLogExporter}, resource=resource)
self.assertEqual(self.set_provider_mock.call_count, 1)
provider = self.set_provider_mock.call_args[0][0]
self.assertIsInstance(provider, DummyLoggerProvider)
self.assertIsInstance(provider.resource, Resource)
self.assertEqual(
provider.resource.attributes.get("service.name"),
"otlp-service",
)
self.assertIsInstance(provider.processor, DummyLogRecordProcessor)
self.assertIsInstance(
provider.processor.exporter, DummyOTLPLogExporter
)
getLogger(__name__).error("hello")
self.assertTrue(provider.processor.exporter.export_called)

@patch.dict(
environ,
Expand Down Expand Up @@ -844,7 +847,7 @@ def test_initialize_components_kwargs(
)

def test_basicConfig_works_with_otel_handler(self):
with ClearLoggingHandlers():
with ResetGlobalLoggingState():
_init_logging(
{"otlp": DummyOTLPLogExporter},
Resource.create({}),
Expand All @@ -866,7 +869,7 @@ def test_basicConfig_works_with_otel_handler(self):
)

def test_basicConfig_preserves_otel_handler(self):
with ClearLoggingHandlers():
with ResetGlobalLoggingState():
_init_logging(
{"otlp": DummyOTLPLogExporter},
Resource.create({}),
Expand All @@ -881,7 +884,6 @@ def test_basicConfig_preserves_otel_handler(self):
)
handler = root_logger.handlers[0]
self.assertIsInstance(handler, LoggingHandler)

logging.basicConfig()

self.assertGreater(len(root_logger.handlers), 1)
Expand All @@ -897,6 +899,49 @@ def test_basicConfig_preserves_otel_handler(self):
"Should still have exactly one OpenTelemetry LoggingHandler",
)

def test_dictConfig_preserves_otel_handler(self):
with ResetGlobalLoggingState():
_init_logging(
{"otlp": DummyOTLPLogExporter},
Resource.create({}),
setup_logging_handler=True,
)

root = logging.getLogger()
self.assertEqual(
len(root.handlers),
1,
"Should be exactly one OpenTelemetry LoggingHandler",
)
logging.config.dictConfig(
{
"version": 1,
"disable_existing_loggers": False, # If this is True all loggers are disabled. Many unit tests assert loggers emit logs.
"handlers": {
"console": {
"class": "logging.StreamHandler",
"level": "DEBUG",
"stream": "ext://sys.stdout",
},
},
"loggers": {
"": { # root logger
"handlers": ["console"],
},
},
}
)
self.assertEqual(len(root.handlers), 2)

logging_handlers = [
h for h in root.handlers if isinstance(h, LoggingHandler)
]
self.assertEqual(
len(logging_handlers),
1,
"Should still have exactly one OpenTelemetry LoggingHandler",
)


class TestMetricsInit(TestCase):
def setUp(self):
Expand Down Expand Up @@ -1134,8 +1179,11 @@ def test_custom_configurator(self, mock_init_comp):
mock_init_comp.assert_called_once_with(**kwargs)


class ClearLoggingHandlers:
class ResetGlobalLoggingState:
def __init__(self):
self.original_basic_config = logging.basicConfig
self.original_dict_config = logging.config.dictConfig
self.original_file_config = logging.config.fileConfig
self.root_logger = getLogger()
self.original_handlers = None

Expand All @@ -1148,6 +1196,9 @@ def __exit__(self, exc_type, exc_val, exc_tb):
self.root_logger.handlers = []
for handler in self.original_handlers:
self.root_logger.addHandler(handler)
logging.basicConfig = self.original_basic_config
logging.config.dictConfig = self.original_dict_config
logging.config.fileConfig = self.original_file_config


class TestClearLoggingHandlers(TestCase):
Expand All @@ -1159,7 +1210,7 @@ def test_preserves_handlers(self):
root_logger.addHandler(test_handler)
expected_handlers = initial_handlers + [test_handler]

with ClearLoggingHandlers():
with ResetGlobalLoggingState():
self.assertEqual(len(root_logger.handlers), 0)
temp_handler = logging.StreamHandler()
root_logger.addHandler(temp_handler)
Expand All @@ -1169,3 +1220,15 @@ def test_preserves_handlers(self):
self.assertIs(h1, h2)

root_logger.removeHandler(test_handler)

def test_preserves_original_logging_fns(self):
def f(x):
print("f")

with ResetGlobalLoggingState():
logging.basicConfig = lambda x: print("lambda")
logging.config.dictConfig = f
logging.config.fileConfig = f
self.assertEqual(logging.config.dictConfig.__name__, "dictConfig")
self.assertEqual(logging.basicConfig.__name__, "basicConfig")
self.assertEqual(logging.config.fileConfig.__name__, "fileConfig")
Loading