diff --git a/.coderabbit.yaml b/.coderabbit.yaml new file mode 100644 index 0000000..62ed085 --- /dev/null +++ b/.coderabbit.yaml @@ -0,0 +1,18 @@ +# yaml-language-server: $schema=https://coderabbit.ai/integrations/schema.v2.json +language: "ko-KR" +early_access: false +reviews: + profile: "chill" + request_changes_workflow: false + high_level_summary: true + poem: true + review_status: true + collapse_walkthrough: false + auto_review: + enabled: true + drafts: false + base_branches: + - main + - test +chat: + auto_reply: true \ No newline at end of file diff --git a/.gitignore b/.gitignore index 83f7d2d..0ebdcfd 100644 --- a/.gitignore +++ b/.gitignore @@ -85,6 +85,7 @@ celerybeat-schedule # Environments .env +.env.* .venv env/ venv/ diff --git a/app/config/database.py b/app/config/database.py index 80d75cc..973d0fd 100644 --- a/app/config/database.py +++ b/app/config/database.py @@ -1,2 +1,40 @@ -# 빈 파일 - DB 연결 설정 +from typing import AsyncGenerator +from sqlalchemy.ext.asyncio import AsyncSession, async_sessionmaker, create_async_engine, AsyncEngine + +from app.config.settings import get_settings +from app.utils.logger import logger + +_settings = get_settings() + +_engine = create_async_engine( + _settings.postgres_url, + pool_pre_ping=True, + echo=False, # SQL 쿼리 로깅 +) + +_async_session_factory = async_sessionmaker( + bind=_engine, + expire_on_commit=False, + autoflush=False, +) + + +def get_async_engine() -> AsyncEngine: + return _engine + + +def get_async_session_factory() -> async_sessionmaker[AsyncSession]: + return _async_session_factory + + +async def get_async_session() -> AsyncGenerator[AsyncSession, None]: + async with _async_session_factory() as session: + try: + yield session + except Exception as e: + logger.error(f"Postgres DB 에러: {e}") + await session.rollback() + raise + finally: + await session.close() diff --git a/app/config/settings.py b/app/config/settings.py index c17f4ec..d23d74d 100644 --- a/app/config/settings.py +++ b/app/config/settings.py @@ -1,2 +1,47 @@ -# 빈 파일 - 환경 변수 설정 +from functools import lru_cache +from pydantic_settings import BaseSettings, SettingsConfigDict + + +class Settings(BaseSettings): + """ + .env 에서 환경변수 로딩 + """ + + model_config = SettingsConfigDict( + env_file=".env", + env_file_encoding="utf-8", + case_sensitive=False, + extra="ignore" + ) + + # GitHub API 설정 + github_api_base_url: str + github_api_token: str | None = None + + # Ollama 설정 + ollama_base_url: str + ollama_api_key: str + ollama_model: str + ollama_timeout_seconds: int + + # Qdrant 설정 + qdrant_base_url: str + qdrant_collection: str + qdrant_api_key: str + + # 텍스트 청크 설정 + text_chunk_max_chars: int + text_chunk_overlap_chars: int + text_chunk_hard_max_chars: int + + # 동시성 설정 + concurrency_embedding_max_concurrency: int + + # PostgreSQL 설정 + postgres_url: str + + +@lru_cache(maxsize=1) +def get_settings() -> Settings: + return Settings() diff --git a/app/db/__init__.py b/app/db/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/db/init_db.py b/app/db/init_db.py new file mode 100644 index 0000000..7c386ea --- /dev/null +++ b/app/db/init_db.py @@ -0,0 +1,17 @@ +from sqlalchemy.ext.asyncio import AsyncEngine + +from app.models.base import Base +from app.utils.logger import logger + + +def _import_all_models() -> None: + from app.models.github_cursor import GithubCursorEntity + + +async def create_tables_if_not_exists(engine: AsyncEngine) -> None: + _import_all_models() + + async with engine.begin() as conn: + await conn.run_sync(Base.metadata.create_all) + + logger.info("DB 테이블 생성완료") diff --git a/app/main.py b/app/main.py index 18f44bd..f9c9cff 100644 --- a/app/main.py +++ b/app/main.py @@ -1,8 +1,22 @@ +from contextlib import asynccontextmanager from typing import Dict from fastapi import FastAPI -app = FastAPI() +from app.config.database import get_async_engine +from app.db.init_db import create_tables_if_not_exists +from app.utils.logger import logger + + +@asynccontextmanager +async def lifespan(app: FastAPI): + logger.info("ChatBot 애플리케이션 시작") + await create_tables_if_not_exists(get_async_engine()) + yield + logger.info("애플리케이션 종료") + + +app = FastAPI(lifespan=lifespan) @app.get("/health") diff --git a/app/models/base.py b/app/models/base.py new file mode 100644 index 0000000..364c29f --- /dev/null +++ b/app/models/base.py @@ -0,0 +1,43 @@ +import uuid +from datetime import datetime + +from sqlalchemy import DateTime, func +from sqlalchemy.dialects.postgresql.base import UUID +from sqlalchemy.orm import DeclarativeBase, Mapped, mapped_column + + +class Base(DeclarativeBase): + """ + SQLAlchemy Declarative Base + - 모든 엔티티는 Base 상속 + """ + pass + + +class TimestampMixin: + """ + created_at, updated_at 자동 관리 Mixin + - created_at: DB 레벨 자동 설정 + - updated_at: 애플리케이션 레벨에서 명시적 관리 + """ + created_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default=func.now(), # DB 레벨 - INSERT 시 자동 + ) + + updated_at: Mapped[datetime] = mapped_column( + DateTime(timezone=True), + nullable=False, + server_default=func.now(), # DB 레벨 - INSERT 시 자동 + ) + +class PrimaryKeyMixin: + """ + UUID Primary Key Mixin + """ + id: Mapped[uuid.UUID] = mapped_column( + UUID(as_uuid=True), + primary_key=True, + default=uuid.uuid4, + ) diff --git a/app/models/enums/__init__.py b/app/models/enums/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/app/models/enums/source_type.py b/app/models/enums/source_type.py new file mode 100644 index 0000000..9501338 --- /dev/null +++ b/app/models/enums/source_type.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class SourceType(str, Enum): + """ + 깃허브 임베딩 대상 SourceType + - Repository: 레포 파일/문서 (README 등) + - ISSUE: 이슈 + - PULL_REQUEST: PR + - COMMIT: 커밋 + - RELEASE: 릴리즈 + """ + REPOSITORY = "REPOSITORY" + ISSUE = "ISSUE" + PULL_REQUEST = "PULL_REQUEST" + COMMIT = "COMMIT" + RELEASE = "RELEASE" diff --git a/app/models/github_cursor.py b/app/models/github_cursor.py new file mode 100644 index 0000000..990cdf2 --- /dev/null +++ b/app/models/github_cursor.py @@ -0,0 +1,20 @@ +from sqlalchemy import Enum as SqlEnum +from sqlalchemy import UniqueConstraint, Index, String +from sqlalchemy.orm import Mapped, mapped_column + +from app.models.base import Base, PrimaryKeyMixin, TimestampMixin +from app.models.enums.source_type import SourceType + + +class GithubCursorEntity(Base, PrimaryKeyMixin, TimestampMixin): + __tablename__ = "github_cursor" + __table_args__ = ( + UniqueConstraint("repository_name", "source_type", name="uq_github_cursor"), + Index("idx_github_cursor_repo_type", "repository_name", "source_type") + ) + + repository_name: Mapped[str] = mapped_column(String(200), nullable=False) + + source_type: Mapped[SourceType] = mapped_column(SqlEnum(SourceType, native_enum=False), nullable=False) + + cursor_value: Mapped[str] = mapped_column(String(500), nullable=False) diff --git a/app/models/github_issue.py b/app/models/github_issue.py deleted file mode 100644 index 4a402cb..0000000 --- a/app/models/github_issue.py +++ /dev/null @@ -1,2 +0,0 @@ -# 빈 파일 - GitHub 이슈 모델 - diff --git a/app/repositories/github_cursor_repository.py b/app/repositories/github_cursor_repository.py new file mode 100644 index 0000000..7a5e93d --- /dev/null +++ b/app/repositories/github_cursor_repository.py @@ -0,0 +1,53 @@ +import uuid +from typing import Optional + +from sqlalchemy import select, func +from sqlalchemy.dialects.postgresql import insert +from sqlalchemy.ext.asyncio import AsyncSession + +from app.models.enums.source_type import SourceType +from app.models.github_cursor import GithubCursorEntity + + +class GithubCursorRepository: + async def find_by_repository_name_and_source_type( + self, + session: AsyncSession, + repository_name: str, + source_type: SourceType, + ) -> Optional[GithubCursorEntity]: + """ + 특정 repository + source_type 커서 조회 + """ + query = select(GithubCursorEntity).where( + GithubCursorEntity.repository_name == repository_name, + GithubCursorEntity.source_type == source_type, + ) + result = await session.execute(query) + return result.scalar_one_or_none() + + async def upsert( + self, + session: AsyncSession, + repository_name: str, + source_type: SourceType, + cursor_value: str, + ) -> GithubCursorEntity: + """ + 커서 upsert (없으면 생성, 있으면 업데이트) + """ + query = insert(GithubCursorEntity).values( + id=uuid.uuid4(), + repository_name=repository_name, + source_type=source_type, + cursor_value=cursor_value, + ).on_conflict_do_update( + index_elements=["repository_name", "source_type"], + set_={ + "cursor_value": cursor_value, + "updated_at": func.now(), + }, + ).returning(GithubCursorEntity) + + result = await session.execute(query) + return result.scalar_one() diff --git a/app/utils/logger.py b/app/utils/logger.py index e4114fb..f6e39c8 100644 --- a/app/utils/logger.py +++ b/app/utils/logger.py @@ -10,61 +10,60 @@ def setup_logger( - name: str = "chatbot", - log_level: str = "INFO", - log_file: Optional[str] = None, - format_string: Optional[str] = None, + name: str = "chatbot", + log_level: str = "INFO", + log_file: Optional[str] = None, + format_string: Optional[str] = None, ) -> logging.Logger: - """ - 로거를 설정하고 반환합니다. + """ + 로거를 설정하고 반환합니다. - Args: - name: 로거 이름 (기본값: "chatbot") - log_level: 로그 레벨 (DEBUG, INFO, WARNING, ERROR, CRITICAL) - log_file: 로그 파일 경로 (None이면 파일 로깅 안 함) - format_string: 커스텀 포맷 문자열 (None이면 기본 포맷 사용) + Args: + name: 로거 이름 (기본값: "chatbot") + log_level: 로그 레벨 (DEBUG, INFO, WARNING, ERROR, CRITICAL) + log_file: 로그 파일 경로 (None이면 파일 로깅 안 함) + format_string: 커스텀 포맷 문자열 (None이면 기본 포맷 사용) - Returns: - 설정된 Logger 인스턴스 - """ - logger = logging.getLogger(name) - - # 이미 핸들러가 설정되어 있으면 기존 로거 반환 - if logger.handlers: - return logger + Returns: + 설정된 Logger 인스턴스 + """ + logger = logging.getLogger(name) - # 로그 레벨 설정 - level = getattr(logging, log_level.upper(), logging.INFO) - logger.setLevel(level) + # 이미 핸들러가 설정되어 있으면 기존 로거 반환 + if logger.handlers: + return logger - # 기본 포맷 설정 - if format_string is None: - format_string = ( - "%(asctime)s - %(name)s - %(levelname)s - " - "%(filename)s:%(lineno)d - %(message)s" - ) - - formatter = logging.Formatter(format_string, datefmt="%Y-%m-%d %H:%M:%S") + # 로그 레벨 설정 + level = getattr(logging, log_level.upper(), logging.INFO) + logger.setLevel(level) - # 콘솔 핸들러 설정 - console_handler = logging.StreamHandler(sys.stdout) - console_handler.setLevel(level) - console_handler.setFormatter(formatter) - logger.addHandler(console_handler) + # 기본 포맷 설정 + if format_string is None: + format_string = ( + "%(asctime)s - %(name)s - %(levelname)s - " + "%(filename)s:%(lineno)d - %(message)s" + ) - # 파일 핸들러 설정 (선택적) - if log_file: - log_path = Path(log_file) - log_path.parent.mkdir(parents=True, exist_ok=True) - - file_handler = logging.FileHandler(log_file, encoding="utf-8") - file_handler.setLevel(level) - file_handler.setFormatter(formatter) - logger.addHandler(file_handler) + formatter = logging.Formatter(format_string, datefmt="%Y-%m-%d %H:%M:%S") - return logger + # 콘솔 핸들러 설정 + console_handler = logging.StreamHandler(sys.stdout) + console_handler.setLevel(level) + console_handler.setFormatter(formatter) + logger.addHandler(console_handler) + + # 파일 핸들러 설정 (선택적) + if log_file: + log_path = Path(log_file) + log_path.parent.mkdir(parents=True, exist_ok=True) + + file_handler = logging.FileHandler(log_file, encoding="utf-8") + file_handler.setLevel(level) + file_handler.setFormatter(formatter) + logger.addHandler(file_handler) + + return logger # 기본 로거 인스턴스 생성 logger = setup_logger() - diff --git a/requirements.txt b/requirements.txt index 3e3d573..6e1246b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,13 +1,45 @@ +alembic==1.17.2 annotated-doc==0.0.4 annotated-types==0.7.0 anyio==4.12.0 +asyncpg==0.31.0 +certifi==2025.11.12 +cffi==2.0.0 +charset-normalizer==3.4.4 click==8.3.1 -fastapi==0.124.4 +cryptography==46.0.3 +Deprecated==1.3.1 +fastapi==0.127.0 +greenlet==3.3.0 +grpcio==1.76.0 +grpcio-tools==1.76.0 h11==0.16.0 +h2==4.3.0 +hpack==4.1.0 +httpcore==1.0.9 +httpx==0.28.1 +hyperframe==6.1.0 idna==3.11 +Mako==1.3.10 +MarkupSafe==3.0.3 +numpy==2.4.0 +portalocker==3.2.0 +protobuf==6.33.2 +pycparser==2.23 pydantic==2.12.5 +pydantic-settings==2.12.0 pydantic_core==2.41.5 +PyGithub==2.8.1 +PyJWT==2.10.1 +PyNaCl==1.6.1 +python-dotenv==1.2.1 +qdrant-client==1.16.2 +requests==2.32.5 +setuptools==80.9.0 +SQLAlchemy==2.0.45 starlette==0.50.0 typing-inspection==0.4.2 typing_extensions==4.15.0 -uvicorn==0.38.0 +urllib3==2.6.2 +uvicorn==0.40.0 +wrapt==2.0.1 diff --git a/sql/V20251222_141233__create_github_cursor_table.sql b/sql/V20251222_141233__create_github_cursor_table.sql new file mode 100644 index 0000000..4e52192 --- /dev/null +++ b/sql/V20251222_141233__create_github_cursor_table.sql @@ -0,0 +1,16 @@ +CREATE TABLE IF NOT EXISTS github_cursor ( + id UUID PRIMARY KEY, + repository_name VARCHAR(200) NOT NULL, + source_type VARCHAR(50) NOT NULL, + cursor_value VARCHAR(500) NOT NULL, + created_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + updated_at TIMESTAMPTZ NOT NULL DEFAULT NOW(), + CONSTRAINT uq_github_cursor UNIQUE (repository_name, source_type), + + CONSTRAINT ck_github_cursor_source_type + CHECK (source_type IN ('REPOSITORY', 'ISSUE', 'PULL_REQUEST', 'COMMIT', 'RELEASE')) +); + +-- 인덱스 생성 +CREATE INDEX IF NOT EXISTS idx_github_cursor_repo_type + ON github_cursor (repository_name, source_type); \ No newline at end of file