Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions .github/workflows/codeql.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
name: "CodeQL"

on:
push:
branches: [ "main" ]
pull_request:
branches: [ "main" ]
schedule:
- cron: '34 20 * * 1'

jobs:
analyze:
name: Analyze
runs-on: ubuntu-latest
permissions:
actions: read
contents: read
security-events: write

strategy:
fail-fast: false
matrix:
language: [ 'javascript-typescript', 'python' ]

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Initialize CodeQL
uses: github/codeql-action/init@v3
with:
languages: ${{ matrix.language }}

- name: Autobuild
uses: github/codeql-action/autobuild@v3

- name: Perform CodeQL Analysis
uses: github/codeql-action/analyze@v3
with:
category: "/language:${{matrix.language}}"
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,8 @@ exclude = [
".uv-tools/",
"docs/",
"node_modules/",
"tests/**/test_*.py",
"tests/test_logging_security.py",
"tests/test_logging_thread_safety.py",
"tests/test_template.py",
"utils/llm/",
"common/",
Expand Down
63 changes: 63 additions & 0 deletions src/utils/logging_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import asyncio
import os
import re
import sys
import threading

Expand All @@ -12,6 +13,65 @@
_logging_initialized = False
_logging_lock = threading.Lock()

# PII Patterns for redaction (pre-compiled for performance)
# Note: More specific patterns must come before general ones (e.g., sk-ant- before sk-)
_COMPILED_PII_PATTERNS = [
# Email addresses
(
re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"),
"[REDACTED_EMAIL]",
),
# Anthropic API keys (sk-ant-...) - must be before OpenAI pattern
(re.compile(r"sk-ant-[a-zA-Z0-9-]{20,}"), "[REDACTED_API_KEY]"),
# OpenAI API keys (sk-...)
(re.compile(r"sk-[a-zA-Z0-9]{20,}"), "[REDACTED_API_KEY]"),
Comment thread
Miyamura80 marked this conversation as resolved.
# Stripe API keys (sk_live_*, sk_test_*, pk_live_*, pk_test_*, rk_live_*, rk_test_*)
(re.compile(r"[spr]k_(live|test)_[a-zA-Z0-9]{20,}"), "[REDACTED_API_KEY]"),
# Authorization Bearer tokens
(re.compile(r"Bearer\s+[a-zA-Z0-9._\-]{20,}"), "[REDACTED_BEARER_TOKEN]"),
# Generic project/API keys (common formats: xxx_key_*, api_key=*, apikey=*)
(re.compile(r"(?i)(api[_-]?key|project[_-]?key|secret[_-]?key)[=:\s]+['\"]?[a-zA-Z0-9_\-]{16,}['\"]?"), "[REDACTED_KEY]"),
]


def scrub_sensitive_data(record):
"""
Patch function to scrub sensitive data from the log record.
Modifies record["message"] and record["exception"] in place.
"""
# Scrub main message
message = record["message"]
for pattern, placeholder in _COMPILED_PII_PATTERNS:
message = pattern.sub(placeholder, message)
record["message"] = message
Comment thread
Miyamura80 marked this conversation as resolved.
Comment thread
Miyamura80 marked this conversation as resolved.
Comment thread
Miyamura80 marked this conversation as resolved.

# Scrub exception if present
exception = record.get("exception")
if exception:
type_, value, tb = exception
value_str = str(value)
redacted = False
for pattern, placeholder in _COMPILED_PII_PATTERNS:
if pattern.search(value_str):
value_str = pattern.sub(placeholder, value_str)
redacted = True

if redacted:
# Re-instantiate the exception with the redacted message to preserve loguru formatting
try:
# Most standard exceptions accept a single string argument
new_value = type_(value_str)
except Exception:
# Fallback to a generic Exception if type instantiation fails
new_value = Exception(value_str)

# Preserve traceback and context metadata
new_value.__traceback__ = tb
new_value.__cause__ = getattr(value, "__cause__", None)
new_value.__context__ = getattr(value, "__context__", None)

record["exception"] = (type_, new_value, tb)


def _should_show_location(level: str) -> bool:
"""Determine if location should be shown for given log level"""
Expand Down Expand Up @@ -152,6 +212,9 @@ def setup_logging(*, debug=None, info=None, warning=None, error=None, critical=N
# Remove any existing handlers
logger.remove()

# Configure global patcher for log scrubbing
logger.configure(patcher=scrub_sensitive_data)

# Initialize session_id if not already set
if session_id.get() is None:
session_id.set(generate_id())
Expand Down
120 changes: 120 additions & 0 deletions tests/test_logging_security.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
import pytest

from src.utils.logging_config import scrub_sensitive_data
from tests.test_template import TestTemplate


class TestLoggingSecurity(TestTemplate):
Comment thread
Miyamura80 marked this conversation as resolved.
@pytest.fixture(autouse=True)
def setup_shared_variables(self, setup):
# Initialize shared attributes here
pass

def test_email_redaction(self):
"""Test that email addresses are redacted from log messages."""
record = {"message": "User email is test@example.com", "exception": None}
scrub_sensitive_data(record)
assert "test@example.com" not in record["message"]
assert "[REDACTED_EMAIL]" in record["message"]

def test_api_key_redaction(self):
"""Test that OpenAI API keys are redacted from log messages."""
api_key = "sk-abc123def456ghi789jkl012mno345pqr678stu901"
record = {"message": f"Using key: {api_key}", "exception": None}
scrub_sensitive_data(record)
assert api_key not in record["message"]
assert "[REDACTED_API_KEY]" in record["message"]

def test_multiple_redactions(self):
"""Test redacting multiple sensitive items in a single message."""
record = {
"message": "Email test@example.com and key sk-123456789012345678901234",
"exception": None,
}
scrub_sensitive_data(record)
assert "[REDACTED_EMAIL]" in record["message"]
assert "[REDACTED_API_KEY]" in record["message"]
assert "test@example.com" not in record["message"]
assert "sk-123456789012345678901234" not in record["message"]

def test_exception_message_redaction(self):
"""Test that PII is redacted from exception messages."""
# Mocking the exception tuple structure used by loguru: (type, value, traceback)
exception_value = ValueError("Failed for user test@example.com")
record = {
"message": "An error occurred",
"exception": (ValueError, exception_value, None),
}

scrub_sensitive_data(record)

# Verify message (even if it didn't have PII)
assert record["message"] == "An error occurred"

# Verify exception redaction
_, value, _ = record["exception"]
assert "test@example.com" not in str(value)
assert "[REDACTED_EMAIL]" in str(value)

def test_exception_api_key_redaction(self):
"""Test redacting API keys from exception values."""
api_key = "sk-123456789012345678901234"
exception_value = Exception(f"Auth failed with {api_key}")
record = {"message": "Error", "exception": (Exception, exception_value, None)}

scrub_sensitive_data(record)

_, value, _ = record["exception"]
assert api_key not in str(value)
assert "[REDACTED_API_KEY]" in str(value)

def test_no_sensitive_data_unchanged(self):
"""Test that normal messages are left untouched."""
original_message = "Normal system message"
record = {"message": original_message, "exception": None}
scrub_sensitive_data(record)
assert record["message"] == original_message

def test_anthropic_api_key_redaction(self):
"""Test that Anthropic API keys are redacted."""
api_key = "sk-ant-api03-abc123def456ghi789jkl012mno345pqr678"
record = {"message": f"Using Anthropic key: {api_key}", "exception": None}
scrub_sensitive_data(record)
assert api_key not in record["message"]
assert "[REDACTED_API_KEY]" in record["message"]

def test_stripe_api_key_redaction(self):
"""Test that Stripe API keys are redacted."""
# Construct keys dynamically to avoid GitHub secret scanning
suffix = "0" * 24
prefixes = ["sk" + "_live_", "sk" + "_test_", "pk" + "_live_", "rk" + "_live_"]

for prefix in prefixes:
key = prefix + suffix
record = {"message": f"Stripe key: {key}", "exception": None}
scrub_sensitive_data(record)
assert key not in record["message"]
assert "[REDACTED_API_KEY]" in record["message"]

def test_bearer_token_redaction(self):
"""Test that Authorization Bearer tokens are redacted."""
token = "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkw"
record = {"message": f"Authorization: {token}", "exception": None}
scrub_sensitive_data(record)
assert "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9" not in record["message"]
assert "[REDACTED_BEARER_TOKEN]" in record["message"]

def test_generic_api_key_redaction(self):
"""Test that generic api_key patterns are redacted."""
patterns = [
"api_key=abc123def456ghi789jkl012",
"API-KEY: abc123def456ghi789jkl012",
"apikey='abc123def456ghi789jkl012'",
"project_key=abc123def456ghi789jkl012",
"secret-key: abc123def456ghi789jkl012",
]
for pattern in patterns:
record = {"message": f"Config: {pattern}", "exception": None}
scrub_sensitive_data(record)
assert "abc123def456ghi789jkl012" not in record["message"]
assert "[REDACTED_KEY]" in record["message"]
Loading