Skip to content

HTTP request processor #46

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 7 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,13 @@ path = "structlog_gcp/__about__.py"
[tool.uv]
dev-dependencies = [
"build[uv]>=1.2.2",
"httpx>=0.27.2",
"mypy>=1.11.2",
"pytest>=8.3.3",
"pytest-asyncio>=0.24.0",
"pytest-cov>=5.0.0",
"pytest>=8.3.3",
"ruff>=0.6.6",
"starlette>=0.38.5",
]

[tool.pytest.ini_options]
Expand All @@ -54,7 +57,9 @@ addopts = [
"--verbose",
"--verbose",
]
testpaths = "tests"
testpaths = ["structlog_gcp", "tests"]
asyncio_default_fixture_loop_scope = "session"
asyncio_mode = "auto"

[tool.coverage.run]
branch = true
Expand Down
4 changes: 3 additions & 1 deletion structlog_gcp/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import structlog.processors
from structlog.typing import Processor

from . import error_reporting, processors
from . import error_reporting, http, processors


def build_processors(
Expand Down Expand Up @@ -57,6 +57,8 @@ def build_gcp_processors(
procs.extend(processors.setup_log_severity())
procs.extend(processors.setup_code_location())

procs.append(http.request_processor)

# Errors: log exceptions
procs.extend(error_reporting.setup_exceptions())

Expand Down
5 changes: 5 additions & 0 deletions structlog_gcp/http/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
from .base import request_processor

__all__ = [
"request_processor",
]
Empty file.
49 changes: 49 additions & 0 deletions structlog_gcp/http/adapters/starlette.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import time
from types import TracebackType
from typing import Type

from starlette.requests import Request
from starlette.responses import Response

from ..base import HTTPRequest


class RequestAdapter:
def __init__(self, request: Request):
self.request = request
self.start_time = time.perf_counter_ns()

async def __aenter__(self) -> "RequestAdapter":
return self

async def __aexit__(
self,
exc_type: Type[BaseException] | None,
exc_value: BaseException | None,
traceback: TracebackType | None,
) -> bool | None:
return None

def adapt(self, response: Response) -> HTTPRequest:
request = self.request # alias

latency_ns = time.perf_counter_ns() - self.start_time
http_version = request.scope["http_version"]

if request.client is not None:
client_host = request.client.host
client_port = request.client.port
remote_ip = f"{client_host}:{client_port}"
else:
remote_ip = None

return HTTPRequest(
method=request.method,
url=str(request.url),
status=response.status_code,
latency_ns=latency_ns,
protocol=f"HTTP/{http_version}",
user_agent=request.headers.get("user-agent"),
referer=request.headers.get("referer"),
remote_ip=remote_ip,
)
151 changes: 151 additions & 0 deletions structlog_gcp/http/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
from dataclasses import dataclass
from typing import Any

from structlog.typing import EventDict, WrappedLogger

from ..types import CLOUD_LOGGING_KEY

__all__ = ["HTTPRequest"]


@dataclass
class HTTPRequest:
method: str | None = None
"""The request method.

Examples: ``GET``, ``HEAD``, ``PUT``, ``POST``."""

url: str | None = None
"""The scheme (``http``, ``https``), the host name, the path and the query portion of the URL that was requested.

Example: ``http://example.com/some/info?color=red``
"""

size: int | None = None
"The size of the HTTP request message in bytes, including the request headers and the request body."

status: int | None = None
"""The response code indicating the status of response.

Examples: ``200``, ``404``."""

response_size: int | None = None
"""The size of the HTTP response message sent back to the client, in bytes, including the response headers and the response body."""

latency_ns: int | None = None
"""The request processing latency (in nanoseconds) on the server, from the time the request was received until the response was sent.

For WebSocket connections, this field refers to the entire time duration of the connection."""

protocol: str | None = None
"""Protocol used for the request.

Examples: ``HTTP/1.1``, ``HTTP/2``."""

referer: str | None = None
"The referer URL of the request, as defined in `HTTP/1.1 Header Field Definitions <https://datatracker.ietf.org/doc/html/rfc2616#section-14.36>`_."

user_agent: str | None = None
"""The user agent sent by the client.

Example: ``Mozilla/4.0 (compatible; MSIE 6.0; Windows 98; Q312461; .NET CLR 1.0.3705)``."""

remote_ip: str | None = None
"""The IP address (IPv4 or IPv6) of the client that issued the HTTP request. This field can include port information.

Examples: ``192.168.1.1``, ``10.0.0.1:80``, ``FE80::0202:B3FF:FE1E:8329``."""

server_ip: str | None = None
"""The IP address (IPv4 or IPv6) of the origin server that the request was sent to. This field can include port information.

Examples: ``192.168.1.1``, ``10.0.0.1:80``, ``FE80::0202:B3FF:FE1E:8329``."""

cache_lookup: bool | None = None
"Whether or not a cache lookup was attempted."

cache_hit: bool | None = None
"Whether or not an entity was served from cache (with or without validation)."

cache_validated_with_origin_server: bool | None = None
"""Whether or not the response was validated with the origin server before being served from cache.

This field is only meaningful if ``cache_hit`` is ``True``."""

cache_fill: int | None = None
"The number of HTTP response bytes inserted into cache. Set only when a cache fill was attempted."

def format(self) -> dict[str, Any]:
"""Format as https://cloud.google.com/logging/docs/reference/v2/rest/v2/LogEntry#httprequest"""

values = [
("requestMethod", self.method),
("requestUrl", self.url),
("requestSize", str_or_none(self.size)),
("status", self.status),
("responseSize", str_or_none(self.response_size)),
("userAgent", self.user_agent),
("remoteIp", self.remote_ip),
("serverIp", self.server_ip),
("referer", self.referer),
("latency", seconds_or_none(self.latency_ns)),
("cacheLookup", self.cache_lookup),
("cacheHit", self.cache_hit),
("cacheValidatedWithOriginServer", self.cache_validated_with_origin_server),
("cacheFillBytes", str_or_none(self.cache_fill)),
("protocol", self.protocol),
]

return {k: v for (k, v) in values if v is not None}


def request_processor(
logger: WrappedLogger, method_name: str, event_dict: EventDict
) -> EventDict:
if "http_request" not in event_dict:
return event_dict

request: HTTPRequest = event_dict.pop("http_request")

event_dict[CLOUD_LOGGING_KEY]["httpRequest"] = request.format()
return event_dict


def str_or_none(value: int | None) -> str | None:
"""Convert a value to string, or ``None``.

>>> str_or_none(None)
>>> str_or_none(15)
'15'
"""

if value is None:
return None

return str(value)


def seconds_or_none(value_ns: int | None) -> str | None:
"""Convert a value in nanosecond to a duration in second, or ``None``.

>>> ns = 1
>>> us = ns * 1000
>>> ms = us * 1000
>>> s = ms * 1000
>>> seconds_or_none(None)
>>> seconds_or_none(1 * ns)
'0.000000001s'
>>> seconds_or_none(15 * ms)
'0.015s'
>>> seconds_or_none(1 * s)
'1s'
>>> seconds_or_none(99.5 * s)
'99.5s'
"""

if value_ns is None:
return None

seconds = value_ns / 1e9
# Remove trailing 0 and . to keep number cleaner
seconds_str = f"{seconds:.9f}".rstrip("0.")
return f"{seconds_str}s"
5 changes: 4 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,8 +40,11 @@ def logger(mock_logger_env: None) -> Generator[WrappedLogger, None, None]:
structlog.reset_defaults()


T_stdout = Callable[[], str]


@pytest.fixture
def stdout(capsys: CaptureFixture[str]) -> Callable[[], str]:
def stdout(capsys: CaptureFixture[str]) -> T_stdout:
def read() -> str:
output = capsys.readouterr()
assert "" == output.err
Expand Down
102 changes: 102 additions & 0 deletions tests/test_adapter_starlette.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import json
from unittest.mock import patch

import structlog
from starlette.applications import Starlette
from starlette.middleware import Middleware
from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.requests import Request
from starlette.responses import PlainTextResponse, Response
from starlette.routing import Route
from starlette.testclient import TestClient
from starlette.types import ASGIApp
from structlog.typing import WrappedLogger

from structlog_gcp.http.adapters.starlette import RequestAdapter

from .conftest import T_stdout


class LoggerMiddleware(BaseHTTPMiddleware):
def __init__(self, app: ASGIApp) -> None:
super().__init__(app)
self.logger = structlog.get_logger()

async def dispatch(
self, request: Request, call_next: RequestResponseEndpoint
) -> Response:
mocked_scope = {}
if "no-client" in str(request.url):
# Remove the client information to test the adapter works correctly
# when the client is not set.
mocked_scope = {"client": None}

async with RequestAdapter(request) as adapter:
with patch.dict(request.scope, mocked_scope):
response = await call_next(request)
r = adapter.adapt(response)

# We don't know how much time the call will take, and we can't assert
# the latency value later on, so we fake it here to a static value.
assert r.latency_ns is not None
assert r.latency_ns > 0
r.latency_ns = 42 * 1_000_000

self.logger.info(
f"{r.method} {r.url} {r.protocol} -> {r.status}",
http_request=r,
)

return response


def index(request: Request) -> PlainTextResponse:
return PlainTextResponse("ok")


def create_app() -> Starlette:
middlewares = [Middleware(LoggerMiddleware)]
routes = [Route("/", index)]
return Starlette(routes=routes, middleware=middlewares)


async def test_basic(stdout: T_stdout, logger: WrappedLogger) -> None:
app = create_app()
client = TestClient(app)

response = client.get("/?foo=bar")
assert response.status_code == 200

expected = {
"logging.googleapis.com/sourceLocation": {
"file": "/app/test.py",
"function": "test:test123",
"line": "42",
},
"httpRequest": {
"latency": "0.042s",
"protocol": "HTTP/1.1",
"remoteIp": "testclient:50000",
"requestMethod": "GET",
"requestUrl": "http://testserver/?foo=bar",
"status": 200,
"userAgent": "testclient",
},
"message": "GET http://testserver/?foo=bar HTTP/1.1 -> 200",
"severity": "INFO",
"time": "2023-04-01T08:00:00.000000Z",
}

msg = json.loads(stdout())
assert msg == expected


async def test_no_remote_ip(stdout: T_stdout, logger: WrappedLogger) -> None:
app = create_app()
client = TestClient(app)

response = client.get("/?no-client")
assert response.status_code == 200

msg = json.loads(stdout())
assert "remoteIp" not in msg["httpRequest"]
Loading