Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion topicer_api/client/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ def process_response(response: dict, args) -> None:
print(f"Tag: {tag_span_proposal.tag.name}")
print(f"Span: ({tag_span_proposal.span_start}, {tag_span_proposal.span_end})")
print(f"Proposed text: '{result.text[tag_span_proposal.span_start:tag_span_proposal.span_end]}'")
print(f"Confidence: {tag_span_proposal.confidence:.4f}")

if tag_span_proposal.confidence is not None:
print(f"Confidence: {tag_span_proposal.confidence:.4f}")

if i < len(results) - 1 or j < len(result.tag_span_proposals) - 1:
print("-----")
Expand Down
19 changes: 19 additions & 0 deletions topicer_api/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,25 @@ def __init__(self):
self.TOPICER_API_CONFIGS_DIR = os.getenv("TOPICER_API_CONFIGS_DIR", "./configs")
self.TOPICER_API_CONFIGS_EXTENSION = os.getenv("TOPICER_API_CONFIGS_EXTENSION", ".yaml")

self.LOGGING_CONFIG = {
"version": 1,
"formatters": {
"default": {
"format": "%(levelname)s|%(asctime)s|%(filename)s:%(name)s: %(message)s",
},
},
"handlers": {
"console": {
"class": "logging.StreamHandler",
"formatter": "default",
},
},
"root": {
"level": "INFO",
"handlers": ["console"],
},
}

@staticmethod
def _env_bool(key: str, default: bool = False) -> bool:
val = os.getenv(key)
Expand Down
17 changes: 16 additions & 1 deletion topicer_api/main.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,21 @@
import logging
from fastapi import FastAPI
from contextlib import asynccontextmanager

from topicer_api.routes import topicer_router
from topicer_api.topicers import load_topicers

app = FastAPI()

logger = logging.getLogger(__name__)


@asynccontextmanager
async def lifespan(app: FastAPI):
app.state.loaded_topicers = load_topicers()
logger.info("TopicerAPI started and topicer configurations loaded.")
yield
logging.info("Shutting down TopicerAPI...")


app = FastAPI(title="TopicerAPI", lifespan=lifespan)
app.include_router(topicer_router, prefix="/v1")
78 changes: 48 additions & 30 deletions topicer_api/routes/topicer_routes.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,15 @@
import os
import logging
from typing import Sequence
from fastapi import APIRouter, HTTPException, status
from fastapi import APIRouter, HTTPException, status, Depends
from fastapi.responses import RedirectResponse

from topicer import factory
from topicer.schemas import DBRequest, Tag, TextChunk

from topicer_api.config import config as app_config
from topicer_api.topicers import LoadedTopicers, get_loaded_topicers


logger = logging.getLogger(__name__)
topicer_router = APIRouter()


Expand All @@ -18,103 +19,120 @@ async def root():


@topicer_router.get("/configs", summary="List available Topicer configurations.")
async def get_configs():
configs = [os.path.splitext(file)[0] for file in os.listdir(app_config.TOPICER_API_CONFIGS_DIR)
if file.endswith(app_config.TOPICER_API_CONFIGS_EXTENSION)]
async def get_configs(loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)):
configs = list(loaded_topicers.keys())
return configs


@topicer_router.post("/topics/discover/texts/sparse", summary="Discover topics in provided texts using sparse approach.")
async def discover_topics_sparse(config_name: str, texts: Sequence[TextChunk], n: int | None = None):
config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION))
if not os.path.exists(config_path):
async def discover_topics_sparse(config_name: str, texts: Sequence[TextChunk], n: int | None = None, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)):
if config_name not in loaded_topicers:
logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}")
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.")

topicer_model = factory(config_path)
topicer_model = loaded_topicers[config_name]
logger.info(f"Using topicer config: {config_name}")

try:
result = await topicer_model.discover_topics_sparse(texts=texts, n=n)
logger.info(f"Successfully discovered topics in texts using sparse method and config: {config_name}")
except NotImplementedError:
logger.warning(f"Sparse topic discovery in texts not implemented for config: {config_name}")
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.")

return result


@topicer_router.post("/topics/discover/texts/dense", summary="Discover topics in provided texts using dense approach.")
async def discover_topics_dense(config_name: str, texts: Sequence[TextChunk], n: int | None = None):
config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION))
if not os.path.exists(config_path):
async def discover_topics_dense(config_name: str, texts: Sequence[TextChunk], n: int | None = None, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)):
if config_name not in loaded_topicers:
logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}")
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.")

topicer_model = factory(config_path)
topicer_model = loaded_topicers[config_name]
logger.info(f"Using topicer config: {config_name}")

try:
result = await topicer_model.discover_topics_dense(texts=texts, n=n)
logger.info(f"Successfully discovered topics in texts using dense method and config: {config_name}")
except NotImplementedError:
logger.warning(f"Dense topic discovery in texts not implemented for config: {config_name}")
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.")

return result


@topicer_router.post("/topics/discover/db/sparse", summary="Discover topics in texts stored in database using sparse approach.")
async def discover_topics_in_db_sparse(config_name: str, db_request: DBRequest, n: int | None = None):
config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION))
if not os.path.exists(config_path):
async def discover_topics_in_db_sparse(config_name: str, db_request: DBRequest, n: int | None = None, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)):
if config_name not in loaded_topicers:
logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}")
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.")

topicer_model = factory(config_path)
topicer_model = loaded_topicers[config_name]
logger.info(f"Using topicer config: {config_name}")

try:
result = await topicer_model.discover_topics_in_db_sparse(db_request=db_request, n=n)
logger.info(f"Successfully discovered topics in DB texts using sparse method and config: {config_name}")
except NotImplementedError:
logger.warning(f"Sparse topic discovery in DB texts not implemented for config: {config_name}")
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.")

return result


@topicer_router.post("/topics/discover/db/dense", summary="Discover topics in texts stored in database using dense approach.")
async def discover_topics_in_db_dense(config_name: str, db_request: DBRequest, n: int | None = None):
config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION))
if not os.path.exists(config_path):
async def discover_topics_in_db_dense(config_name: str, db_request: DBRequest, n: int | None = None, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)):
if config_name not in loaded_topicers:
logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}")
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.")

topicer_model = factory(config_path)
topicer_model = loaded_topicers[config_name]
logger.info(f"Using topicer config: {config_name}")

try:
result = await topicer_model.discover_topics_in_db_dense(db_request=db_request, n=n)
logger.info(f"Successfully discovered topics in DB texts using dense method and config: {config_name}")
except NotImplementedError:
logger.warning(f"Dense topic discovery in DB texts not implemented for config: {config_name}")
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.")

return result


@topicer_router.post("/tags/propose/texts", summary="Propose tags on provided text chunk.")
async def propose_tags(config_name: str, text_chunk: TextChunk, tags: list[Tag]):
config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION))
if not os.path.exists(config_path):
async def propose_tags(config_name: str, text_chunk: TextChunk, tags: list[Tag], loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)):
if config_name not in loaded_topicers:
logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}")
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.")

topicer_model = factory(config_path)
topicer_model = loaded_topicers[config_name]
logger.info(f"Using topicer config: {config_name}")

try:
result = await topicer_model.propose_tags(text_chunk=text_chunk, tags=tags)
logger.info(f"Successfully proposed tags in texts using config: {config_name}")
except NotImplementedError:
logger.warning(f"Tag proposal in texts not implemented for config: {config_name}")
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.")

return result


@topicer_router.post("/tags/propose/db", summary="Propose tags on texts stored in database.")
async def propose_tags_in_db(config_name: str, tag: Tag, db_request: DBRequest):
config_path = str(os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_name + app_config.TOPICER_API_CONFIGS_EXTENSION))
if not os.path.exists(config_path):
async def propose_tags_in_db(config_name: str, tag: Tag, db_request: DBRequest, loaded_topicers: LoadedTopicers = Depends(get_loaded_topicers)):
if config_name not in loaded_topicers:
logger.warning(f"Config {config_name} not found among loaded topicers: {list(loaded_topicers.keys())}")
raise HTTPException(status_code=status.HTTP_404_NOT_FOUND, detail=f"Config {config_name} not found.")

topicer_model = factory(config_path)
topicer_model = loaded_topicers[config_name]
logger.info(f"Using topicer config: {config_name}")

try:
result = await topicer_model.propose_tags_in_db(tag=tag, db_request=db_request)
logger.info(f"Successfully proposed tags in DB texts using config: {config_name}")
except NotImplementedError:
logger.warning(f"Tag proposal in DB texts not implemented for config: {config_name}")
raise HTTPException(status_code=status.HTTP_409_CONFLICT, detail=f"Method not applicable to {config_name}.")

return result
3 changes: 2 additions & 1 deletion topicer_api/run.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
import uvicorn
import logging
import logging.config

from topicer_api.config import config


logging.config.dictConfig(config.LOGGING_CONFIG)
logger = logging.getLogger(__name__)


Expand Down
37 changes: 37 additions & 0 deletions topicer_api/topicers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
import os
import logging
from fastapi import Request

from topicer import factory as topicer_factory

from topicer_api.config import config as app_config


logger = logging.getLogger(__name__)


class LoadedTopicers(dict):
pass


def load_topicers() -> LoadedTopicers:
loaded_topicers = LoadedTopicers()

config_files = [file for file in os.listdir(app_config.TOPICER_API_CONFIGS_DIR) if
file.endswith(app_config.TOPICER_API_CONFIGS_EXTENSION)]
for config_file in config_files:
config_name = os.path.splitext(config_file)[0]
config_path = os.path.join(app_config.TOPICER_API_CONFIGS_DIR, config_file)

try:
loaded_topicers[config_name] = topicer_factory(config_path)
except Exception as e:
logger.warning(f"Failed to load topicer config '{config_name}' from '{config_path}': {e}")

logger.info(f"Loaded topicer configurations: {list(loaded_topicers.keys())}")

return loaded_topicers


def get_loaded_topicers(request: Request) -> LoadedTopicers:
return request.app.state.loaded_topicers