diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 0000000..cbb36b5 --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,40 @@ +name: "CodeQL" + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '34 20 * * 1' + +jobs: + analyze: + name: Analyze + runs-on: ubuntu-latest + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'javascript-typescript', 'python' ] + + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Initialize CodeQL + uses: github/codeql-action/init@v3 + with: + languages: ${{ matrix.language }} + + - name: Autobuild + uses: github/codeql-action/autobuild@v3 + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v3 + with: + category: "/language:${{matrix.language}}" diff --git a/pyproject.toml b/pyproject.toml index c1d937e..4c14b38 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -68,7 +68,8 @@ exclude = [ ".uv-tools/", "docs/", "node_modules/", - "tests/**/test_*.py", + "tests/test_logging_security.py", + "tests/test_logging_thread_safety.py", "tests/test_template.py", "utils/llm/", "common/", diff --git a/src/utils/logging_config.py b/src/utils/logging_config.py index e6bdbb6..d90360c 100644 --- a/src/utils/logging_config.py +++ b/src/utils/logging_config.py @@ -1,5 +1,6 @@ import asyncio import os +import re import sys import threading @@ -12,6 +13,65 @@ _logging_initialized = False _logging_lock = threading.Lock() +# PII Patterns for redaction (pre-compiled for performance) +# Note: More specific patterns must come before general ones (e.g., sk-ant- before sk-) +_COMPILED_PII_PATTERNS = [ + # Email addresses + ( + re.compile(r"\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Za-z]{2,}\b"), + "[REDACTED_EMAIL]", + ), + # Anthropic API keys (sk-ant-...) - must be before OpenAI pattern + (re.compile(r"sk-ant-[a-zA-Z0-9-]{20,}"), "[REDACTED_API_KEY]"), + # OpenAI API keys (sk-...) + (re.compile(r"sk-[a-zA-Z0-9]{20,}"), "[REDACTED_API_KEY]"), + # Stripe API keys (sk_live_*, sk_test_*, pk_live_*, pk_test_*, rk_live_*, rk_test_*) + (re.compile(r"[spr]k_(live|test)_[a-zA-Z0-9]{20,}"), "[REDACTED_API_KEY]"), + # Authorization Bearer tokens + (re.compile(r"Bearer\s+[a-zA-Z0-9._\-]{20,}"), "[REDACTED_BEARER_TOKEN]"), + # Generic project/API keys (common formats: xxx_key_*, api_key=*, apikey=*) + (re.compile(r"(?i)(api[_-]?key|project[_-]?key|secret[_-]?key)[=:\s]+['\"]?[a-zA-Z0-9_\-]{16,}['\"]?"), "[REDACTED_KEY]"), +] + + +def scrub_sensitive_data(record): + """ + Patch function to scrub sensitive data from the log record. + Modifies record["message"] and record["exception"] in place. + """ + # Scrub main message + message = record["message"] + for pattern, placeholder in _COMPILED_PII_PATTERNS: + message = pattern.sub(placeholder, message) + record["message"] = message + + # Scrub exception if present + exception = record.get("exception") + if exception: + type_, value, tb = exception + value_str = str(value) + redacted = False + for pattern, placeholder in _COMPILED_PII_PATTERNS: + if pattern.search(value_str): + value_str = pattern.sub(placeholder, value_str) + redacted = True + + if redacted: + # Re-instantiate the exception with the redacted message to preserve loguru formatting + try: + # Most standard exceptions accept a single string argument + new_value = type_(value_str) + except Exception: + # Fallback to a generic Exception if type instantiation fails + new_value = Exception(value_str) + + # Preserve traceback and context metadata + new_value.__traceback__ = tb + new_value.__cause__ = getattr(value, "__cause__", None) + new_value.__context__ = getattr(value, "__context__", None) + + record["exception"] = (type_, new_value, tb) + def _should_show_location(level: str) -> bool: """Determine if location should be shown for given log level""" @@ -152,6 +212,9 @@ def setup_logging(*, debug=None, info=None, warning=None, error=None, critical=N # Remove any existing handlers logger.remove() + # Configure global patcher for log scrubbing + logger.configure(patcher=scrub_sensitive_data) + # Initialize session_id if not already set if session_id.get() is None: session_id.set(generate_id()) diff --git a/tests/test_logging_security.py b/tests/test_logging_security.py new file mode 100644 index 0000000..69562f4 --- /dev/null +++ b/tests/test_logging_security.py @@ -0,0 +1,120 @@ +import pytest + +from src.utils.logging_config import scrub_sensitive_data +from tests.test_template import TestTemplate + + +class TestLoggingSecurity(TestTemplate): + @pytest.fixture(autouse=True) + def setup_shared_variables(self, setup): + # Initialize shared attributes here + pass + + def test_email_redaction(self): + """Test that email addresses are redacted from log messages.""" + record = {"message": "User email is test@example.com", "exception": None} + scrub_sensitive_data(record) + assert "test@example.com" not in record["message"] + assert "[REDACTED_EMAIL]" in record["message"] + + def test_api_key_redaction(self): + """Test that OpenAI API keys are redacted from log messages.""" + api_key = "sk-abc123def456ghi789jkl012mno345pqr678stu901" + record = {"message": f"Using key: {api_key}", "exception": None} + scrub_sensitive_data(record) + assert api_key not in record["message"] + assert "[REDACTED_API_KEY]" in record["message"] + + def test_multiple_redactions(self): + """Test redacting multiple sensitive items in a single message.""" + record = { + "message": "Email test@example.com and key sk-123456789012345678901234", + "exception": None, + } + scrub_sensitive_data(record) + assert "[REDACTED_EMAIL]" in record["message"] + assert "[REDACTED_API_KEY]" in record["message"] + assert "test@example.com" not in record["message"] + assert "sk-123456789012345678901234" not in record["message"] + + def test_exception_message_redaction(self): + """Test that PII is redacted from exception messages.""" + # Mocking the exception tuple structure used by loguru: (type, value, traceback) + exception_value = ValueError("Failed for user test@example.com") + record = { + "message": "An error occurred", + "exception": (ValueError, exception_value, None), + } + + scrub_sensitive_data(record) + + # Verify message (even if it didn't have PII) + assert record["message"] == "An error occurred" + + # Verify exception redaction + _, value, _ = record["exception"] + assert "test@example.com" not in str(value) + assert "[REDACTED_EMAIL]" in str(value) + + def test_exception_api_key_redaction(self): + """Test redacting API keys from exception values.""" + api_key = "sk-123456789012345678901234" + exception_value = Exception(f"Auth failed with {api_key}") + record = {"message": "Error", "exception": (Exception, exception_value, None)} + + scrub_sensitive_data(record) + + _, value, _ = record["exception"] + assert api_key not in str(value) + assert "[REDACTED_API_KEY]" in str(value) + + def test_no_sensitive_data_unchanged(self): + """Test that normal messages are left untouched.""" + original_message = "Normal system message" + record = {"message": original_message, "exception": None} + scrub_sensitive_data(record) + assert record["message"] == original_message + + def test_anthropic_api_key_redaction(self): + """Test that Anthropic API keys are redacted.""" + api_key = "sk-ant-api03-abc123def456ghi789jkl012mno345pqr678" + record = {"message": f"Using Anthropic key: {api_key}", "exception": None} + scrub_sensitive_data(record) + assert api_key not in record["message"] + assert "[REDACTED_API_KEY]" in record["message"] + + def test_stripe_api_key_redaction(self): + """Test that Stripe API keys are redacted.""" + # Construct keys dynamically to avoid GitHub secret scanning + suffix = "0" * 24 + prefixes = ["sk" + "_live_", "sk" + "_test_", "pk" + "_live_", "rk" + "_live_"] + + for prefix in prefixes: + key = prefix + suffix + record = {"message": f"Stripe key: {key}", "exception": None} + scrub_sensitive_data(record) + assert key not in record["message"] + assert "[REDACTED_API_KEY]" in record["message"] + + def test_bearer_token_redaction(self): + """Test that Authorization Bearer tokens are redacted.""" + token = "Bearer eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiIxMjM0NTY3ODkw" + record = {"message": f"Authorization: {token}", "exception": None} + scrub_sensitive_data(record) + assert "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9" not in record["message"] + assert "[REDACTED_BEARER_TOKEN]" in record["message"] + + def test_generic_api_key_redaction(self): + """Test that generic api_key patterns are redacted.""" + patterns = [ + "api_key=abc123def456ghi789jkl012", + "API-KEY: abc123def456ghi789jkl012", + "apikey='abc123def456ghi789jkl012'", + "project_key=abc123def456ghi789jkl012", + "secret-key: abc123def456ghi789jkl012", + ] + for pattern in patterns: + record = {"message": f"Config: {pattern}", "exception": None} + scrub_sensitive_data(record) + assert "abc123def456ghi789jkl012" not in record["message"] + assert "[REDACTED_KEY]" in record["message"]