diff --git a/topicer_api/client/client.py b/topicer_api/client/client.py index 4e53651..8721d96 100644 --- a/topicer_api/client/client.py +++ b/topicer_api/client/client.py @@ -149,7 +149,9 @@ def process_response(response: dict, args) -> None: print(f"Tag: {tag_span_proposal.tag.name}") print(f"Span: ({tag_span_proposal.span_start}, {tag_span_proposal.span_end})") print(f"Proposed text: '{result.text[tag_span_proposal.span_start:tag_span_proposal.span_end]}'") - print(f"Confidence: {tag_span_proposal.confidence:.4f}") + + if tag_span_proposal.confidence is not None: + print(f"Confidence: {tag_span_proposal.confidence:.4f}") if i < len(results) - 1 or j < len(result.tag_span_proposals) - 1: print("-----") diff --git a/topicer_api/config.py b/topicer_api/config.py index d9644f5..927254d 100644 --- a/topicer_api/config.py +++ b/topicer_api/config.py @@ -11,6 +11,25 @@ def __init__(self): self.TOPICER_API_CONFIGS_DIR = os.getenv("TOPICER_API_CONFIGS_DIR", "./configs") self.TOPICER_API_CONFIGS_EXTENSION = os.getenv("TOPICER_API_CONFIGS_EXTENSION", ".yaml") + self.LOGGING_CONFIG = { + "version": 1, + "formatters": { + "default": { + "format": "%(levelname)s|%(asctime)s|%(filename)s:%(name)s: %(message)s", + }, + }, + "handlers": { + "console": { + "class": "logging.StreamHandler", + "formatter": "default", + }, + }, + "root": { + "level": "INFO", + "handlers": ["console"], + }, + } + @staticmethod def _env_bool(key: str, default: bool = False) -> bool: val = os.getenv(key) diff --git a/topicer_api/main.py b/topicer_api/main.py index 106c97d..065ee57 100644 --- a/topicer_api/main.py +++ b/topicer_api/main.py @@ -1,6 +1,21 @@ +import logging from fastapi import FastAPI +from contextlib import asynccontextmanager from topicer_api.routes import topicer_router +from topicer_api.topicers import load_topicers -app = FastAPI() + +logger = logging.getLogger(__name__) + + +@asynccontextmanager +async def lifespan(app: FastAPI): + app.state.loaded_topicers = load_topicers() + logger.info("TopicerAPI started and topicer configurations loaded.") + yield + logging.info("Shutting down TopicerAPI...") + + +app = FastAPI(title="TopicerAPI", lifespan=lifespan) app.include_router(topicer_router, prefix="/v1") diff --git a/topicer_api/routes/topicer_routes.py b/topicer_api/routes/topicer_routes.py index 0d54821..6bd1e8e 100644 --- a/topicer_api/routes/topicer_routes.py +++ b/topicer_api/routes/topicer_routes.py @@ -1,14 +1,15 @@ import os +import logging from typing import Sequence -from fastapi import APIRouter, HTTPException, status +from fastapi import APIRouter, HTTPException, status, Depends from fastapi.responses import RedirectResponse -from topicer import factory from topicer.schemas import DBRequest, Tag, TextChunk -from topicer_api.config import config as app_config +from topicer_api.topicers import LoadedTopicers, get_loaded_topicers +logger = logging.getLogger(__name__) topicer_router = APIRouter() @@ -18,103 +19,120 @@ async def root(): @topicer_router.get("/configs", summary="List available Topicer configurations.") -async def get_configs(): - configs = [os.path.splitext(file)[0] for file in os.listdir(app_config.TOPICER_API_CONFIGS_DIR) - if file.endswith(app_config.TOPICER_API_CONFIGS_EXTENSION)] +async def get_configs(loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)): + configs = list(loaded_topicers.keys()) return configs @topicer_router.post("/topics/discover/texts/sparse", summary="Discover topics in provided texts using sparse approach.") -async def discover_topics_sparse(config_name: str, texts: Sequence[TextChunk], n: int | None = None): - config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION)) - if not os.path.exists(config_path): +async def discover_topics_sparse(config_name: str, texts: Sequence[TextChunk], n: int | None = None, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)): + if config_name not in loaded_topicers: + logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}") raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.") - topicer_model = factory(config_path) + topicer_model = loaded_topicers[config_name] + logger.info(f"Using topicer config: {config_name}") try: result = await topicer_model.discover_topics_sparse(texts=texts, n=n) + logger.info(f"Successfully discovered topics in texts using sparse method and config: {config_name}") except NotImplementedError: + logger.warning(f"Sparse topic discovery in texts not implemented for config: {config_name}") raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.") return result @topicer_router.post("/topics/discover/texts/dense", summary="Discover topics in provided texts using dense approach.") -async def discover_topics_dense(config_name: str, texts: Sequence[TextChunk], n: int | None = None): - config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION)) - if not os.path.exists(config_path): +async def discover_topics_dense(config_name: str, texts: Sequence[TextChunk], n: int | None = None, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)): + if config_name not in loaded_topicers: + logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}") raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.") - topicer_model = factory(config_path) + topicer_model = loaded_topicers[config_name] + logger.info(f"Using topicer config: {config_name}") try: result = await topicer_model.discover_topics_dense(texts=texts, n=n) + logger.info(f"Successfully discovered topics in texts using dense method and config: {config_name}") except NotImplementedError: + logger.warning(f"Dense topic discovery in texts not implemented for config: {config_name}") raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.") return result @topicer_router.post("/topics/discover/db/sparse", summary="Discover topics in texts stored in database using sparse approach.") -async def discover_topics_in_db_sparse(config_name: str, db_request: DBRequest, n: int | None = None): - config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION)) - if not os.path.exists(config_path): +async def discover_topics_in_db_sparse(config_name: str, db_request: DBRequest, n: int | None = None, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)): + if config_name not in loaded_topicers: + logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}") raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.") - topicer_model = factory(config_path) + topicer_model = loaded_topicers[config_name] + logger.info(f"Using topicer config: {config_name}") try: result = await topicer_model.discover_topics_in_db_sparse(db_request=db_request, n=n) + logger.info(f"Successfully discovered topics in DB texts using sparse method and config: {config_name}") except NotImplementedError: + logger.warning(f"Sparse topic discovery in DB texts not implemented for config: {config_name}") raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.") return result @topicer_router.post("/topics/discover/db/dense", summary="Discover topics in texts stored in database using dense approach.") -async def discover_topics_in_db_dense(config_name: str, db_request: DBRequest, n: int | None = None): - config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION)) - if not os.path.exists(config_path): +async def discover_topics_in_db_dense(config_name: str, db_request: DBRequest, n: int | None = None, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)): + if config_name not in loaded_topicers: + logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}") raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.") - topicer_model = factory(config_path) + topicer_model = loaded_topicers[config_name] + logger.info(f"Using topicer config: {config_name}") try: result = await topicer_model.discover_topics_in_db_dense(db_request=db_request, n=n) + logger.info(f"Successfully discovered topics in DB texts using dense method and config: {config_name}") except NotImplementedError: + logger.warning(f"Dense topic discovery in DB texts not implemented for config: {config_name}") raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.") return result @topicer_router.post("/tags/propose/texts", summary="Propose tags on provided text chunk.") -async def propose_tags(config_name: str, text_chunk: TextChunk, tags: list[Tag]): - config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION)) - if not os.path.exists(config_path): +async def propose_tags(config_name: str, text_chunk: TextChunk, tags: list[Tag], loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)): + if config_name not in loaded_topicers: + logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}") raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.") - topicer_model = factory(config_path) + topicer_model = loaded_topicers[config_name] + logger.info(f"Using topicer config: {config_name}") try: result = await topicer_model.propose_tags(text_chunk=text_chunk, tags=tags) + logger.info(f"Successfully proposed tags in texts using config: {config_name}") except NotImplementedError: + logger.warning(f"Tag proposal in texts not implemented for config: {config_name}") raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.") return result @topicer_router.post("/tags/propose/db", summary="Propose tags on texts stored in database.") -async def propose_tags_in_db(config_name: str, tag: Tag, db_request: DBRequest): - config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION)) - if not os.path.exists(config_path): +async def propose_tags_in_db(config_name: str, tag: Tag, db_request: DBRequest, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)): + if config_name not in loaded_topicers: + logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}") raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.") - topicer_model = factory(config_path) + topicer_model = loaded_topicers[config_name] + logger.info(f"Using topicer config: {config_name}") try: result = await topicer_model.propose_tags_in_db(tag=tag, db_request=db_request) + logger.info(f"Successfully proposed tags in DB texts using config: {config_name}") except NotImplementedError: + logger.warning(f"Tag proposal in DB texts not implemented for config: {config_name}") raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.") return result diff --git a/topicer_api/run.py b/topicer_api/run.py index 100c821..020c541 100644 --- a/topicer_api/run.py +++ b/topicer_api/run.py @@ -1,9 +1,10 @@ import uvicorn -import logging +import logging.config from topicer_api.config import config +logging.config.dictConfig(config.LOGGING_CONFIG) logger = logging.getLogger(__name__) diff --git a/topicer_api/topicers.py b/topicer_api/topicers.py new file mode 100644 index 0000000..98079ae --- /dev/null +++ b/topicer_api/topicers.py @@ -0,0 +1,37 @@ +import os +import logging +from fastapi import Request + +from topicer import factory as topicer_factory + +from topicer_api.config import config as app_config + + +logger = logging.getLogger(__name__) + + +class LoadedTopicers(dict): + pass + + +def load_topicers() -> LoadedTopicers: + loaded_topicers = LoadedTopicers() + + config_files = [file for file in os.listdir(app_config.TOPICER_API_CONFIGS_DIR) if + file.endswith(app_config.TOPICER_API_CONFIGS_EXTENSION)] + for config_file in config_files: + config_name = os.path.splitext(config_file)[0] + config_path = os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_file) + + try: + loaded_topicers[config_name] = topicer_factory(config_path) + except Exception as e: + logger.warning(f"Failed to load topicer config '{config_name}' from '{config_path}': {e}") + + logger.info(f"Loaded topicer configurations: {list(loaded_topicers.keys())}") + + return loaded_topicers + + +def get_loaded_topicers(request: Request) -> LoadedTopicers: + return request.app.state.loaded_topicers