diff --git a/medcat-service/README.md b/medcat-service/README.md index d8d0bc1da..4b726ae0f 100644 --- a/medcat-service/README.md +++ b/medcat-service/README.md @@ -389,3 +389,17 @@ The main settings that can be used to improve the performance when querying larg MedCAT parameters are defined in selected `envs/medcat*` file. For details on available MedCAT parameters please refer to [the official GitHub repository](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/). + +## Local development + +For local development, set up a Python virtual environment, install dependencies with pip, and make sure to also install the local MedCAT core library (the `medcat-v2` folder) in editable mode. + +```bash +python -m venv .venv +source .venv/bin/activate +pip install -r requirements.txt -r requirements-dev.txt +SETUPTOOLS_SCM_PRETEND_VERSION="2.4.0-dev0" pip install -e "../medcat-v2[meta-cat,spacy]" +bash start_service_debug.sh + +# Service will run on localhost:8000 +``` \ No newline at end of file diff --git a/medcat-service/medcat_service/config.py b/medcat-service/medcat_service/config.py index ee12a3601..febb84469 100644 --- a/medcat-service/medcat_service/config.py +++ b/medcat-service/medcat_service/config.py @@ -38,7 +38,7 @@ class Settings(BaseSettings): ) app_root_path: str = Field( - default="/", + default="", description="The Root Path for the FastAPI App", examples=["/medcat-service"], ) diff --git a/medcat-service/medcat_service/demo/demo_content.py b/medcat-service/medcat_service/demo/demo_content.py new file mode 100644 index 000000000..3092443b9 --- /dev/null +++ b/medcat-service/medcat_service/demo/demo_content.py @@ -0,0 +1,121 @@ + +short_example = "John had been diagnosed with acute Kidney Failure the week before" + + +long_example = """Description: Intracerebral hemorrhage (very acute clinical changes occurred immediately). +CC: Left hand numbness on presentation; then developed lethargy later that day. + +HX: On the day of presentation, this 72 y/o RHM suddenly developed generalized weakness and lightheadedness, and could not rise from a chair. Four hours later he experienced sudden left hand numbness lasting two hours. There were no other associated symptoms except for the generalized weakness and lightheadedness. He denied vertigo. + +He had been experiencing falling spells without associated LOC up to several times a month for the past year. + +MEDS: procardia SR, Lasix, Ecotrin, KCL, Digoxin, Colace, Coumadin. + +PMH: 1)8/92 evaluation for presyncope (Echocardiogram showed: AV fibrosis/calcification, AV stenosis/insufficiency, MV stenosis with annular calcification and regurgitation, moderate TR, Decreased LV systolic function, severe LAE. MRI brain: focal areas of increased T2 signal in the left cerebellum and in the brainstem probably representing microvascular ischemic disease. IVG (MUGA scan)revealed: global hypokinesis of the LV and biventricular dysfunction, RV ejection Fx 45% and LV ejection Fx 39%. He was subsequently placed on coumadin severe valvular heart disease), 2)HTN, 3)Rheumatic fever and heart disease, 4)COPD, 5)ETOH abuse, 6)colonic polyps, 7)CAD, 8)CHF, 9)Appendectomy, 10)Junctional tachycardia. +""" # noqa: E501 + +article_footer = """ +## Disclaimer +This software is intended solely for the testing purposes and non-commercial use. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. + +contact@cogstack.com for more information. + +Please note this is a limited version of MedCAT and it is not trained or validated by clinicans. +""" # noqa: E501 + +anoncat_example = """Patient Information: + +Name: John Parkinson +Date of Birth: February 12, 1958 +Gender: Male +Address: 789 Wellness Lane, Healthville, HV 56789 +Phone: (555) 555-1234 +Email: john.parkinson@email.com +Emergency Contact: + +Name: Mary Parkinson +Relationship: Spouse +Phone: (555) 555-5678 +Insurance Information: + +Insurance Provider: HealthWell Assurance +Policy Number: HW765432109 +Group Number: G876543 +Medical History: + +Allergies: + +None reported +Medications: + +Levodopa/Carbidopa for Parkinson's disease symptoms +Pramipexole for restless legs syndrome +Lisinopril for hypertension +Atorvastatin for hyperlipidemia +Metformin for Type 2 Diabetes +Medical Conditions: + +Parkinson's Disease (diagnosed on June 20, 2015) +Hypertension +Hyperlipidemia +Type 2 Diabetes +Osteoarthritis +Vital Signs: + +Blood Pressure: 130/80 mmHg +Heart Rate: 72 bpm +Temperature: 98.4°F +Respiratory Rate: 18 breaths per minute +Recent Inpatient Stay (Dates: September 1-10, 2023): + +Reason for Admission: Acute exacerbation of Parkinson's symptoms, pneumonia, and uncontrolled diabetes. + +Interventions: + +Neurology Consultation for Parkinson's disease management adjustments. +Antibiotic therapy for pneumonia. +Continuous glucose monitoring and insulin therapy for diabetes control. +Physical therapy sessions to maintain mobility. +Complications: + +Delirium managed with close monitoring and appropriate interventions. +Discharge Plan: + +Medication adjustments for Parkinson's disease. +Follow-up appointments with neurologist, endocrinologist, and primary care. +Home health care for continued physical therapy. +Follow-up Visits: + +Date: October 15, 2023 + +Reason for Visit: Post-discharge Follow-up +Notes: Stable Parkinson's symptoms, pneumonia resolved. Adjusted diabetes medications for better control. +Date: December 5, 2023 + +Reason for Visit: Neurology Follow-up +Notes: Fine-tuned Parkinson's medication regimen. Recommended ongoing physical therapy. +""" # noqa: E501 + +anoncat_help_content = """Demo app for the deidentification of private health information using the CogStack AnonCAT model + +Please DO NOT test with any real sensitive PHI data. + +Local validation and fine-tuning available via [MedCATtrainer]( +https://github.com/CogStack/cogstack-nlp/tree/main/medcat-trainer). +Email us, [contact@cogstack.org](mailto:contact@cogstack.org), to discuss model access, +model performance, and your use case. + +The following PHI items have been trained: + +| PHI Item | Description | +|----------|-------------| +| NHS Number | UK National Health Service Numbers. | +| Name | All names, first, middle, last of patients, relatives, care providers etc. Importantly, does not redact conditions that are named after a name, e.g. "Parkinsons's disease". | +| Date of Birth | DOBs. Does not include other dates that may be in the record, i.e. dates of visit etc. | +| Hospital Number | A unique number provided by the hospital. Distinct from the NHS number | +| Address Line | Address lines - first, second, third or fourth | +| Postcode | UK postal codes - 6 or 7 alphanumeric codes as part of addresses | +| Telephone Number | Telephone numbers, extensions, mobile / cell phone numbers | +| Email | Email addresses | +| Initials | Patient, relatives, care provider name initials. | +""" # noqa: E501 diff --git a/medcat-service/medcat_service/demo/demo_logic.py b/medcat-service/medcat_service/demo/demo_logic.py new file mode 100644 index 000000000..87e8479ed --- /dev/null +++ b/medcat-service/medcat_service/demo/demo_logic.py @@ -0,0 +1,153 @@ +""" +This module provides conversion utilities between the MedCAT output format +and the exact format expected by Gradio components, specifically aligning +with the output schema of Hugging Face Transformers pipelines (e.g., for +NER highlighting). Use these definitions and helper functions to bridge +MedCAT's annotation results and Gradio's interactive demo expectations. +""" + +import logging + +from pydantic import BaseModel + +from medcat_service.dependencies import get_medcat_processor, get_settings +from medcat_service.types import ProcessAPIInputContent +from medcat_service.types_entities import Entity + +logger = logging.getLogger(__name__) + + +class EntityAnnotation(BaseModel): + """ + Expected data format for NER in gradio + """ + + entity: str + score: float + index: int + word: str + start: int + end: int + + +headers = ["Pretty Name", "Identifier", "Confidence Score", "Start Index", "End Index", "ID"] + + +class EntityAnnotationDisplay(BaseModel): + """ + DIsplay data format for use in a datatable + """ + + pretty_name: str + identifier: str + score: float + start: int + end: int + id: int + # Misisng Meta Anns + + +class EntityResponse(BaseModel): + """ + Expected data format of gradio highlightedtext component + """ + + entities: list[EntityAnnotation] + text: str + + +def convert_annotation_to_ner_model(entity: Entity, index: int) -> EntityAnnotation: + return EntityAnnotation( + entity=entity.get("cui", "UNKNOWN"), + score=entity.get("acc", 0.0), + index=index, + word=entity.get("detected_name", ""), + start=entity.get("start", -1), + end=entity.get("end", -1), + ) + + +def convert_annotation_to_display_model(entity: Entity) -> EntityAnnotationDisplay: + return EntityAnnotationDisplay( + pretty_name=entity.get("pretty_name", ""), + identifier=entity.get("cui", "UNKNOWN"), + score=entity.get("acc", 0.0), + start=entity.get("start", -1), + end=entity.get("end", -1), + id=entity.get("id", -1), + # medcat-demo-app/webapp/demo/views.py + # if key == 'meta_anns': + # meta_anns=ent.get("meta_anns", {}) + # if meta_anns: + # for meta_ann in meta_anns.keys(): + # new_ent[meta_ann]=meta_anns[meta_ann]['value'] + ) + + +def convert_entity_dict_to_annotations(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotation]: + annotations: list[EntityAnnotation] = [] + for entity_dict in entity_dict_list: + for key, entity in entity_dict.items(): + annotations.append(convert_annotation_to_ner_model(entity, index=int(key))) + return annotations + + +def convert_entity_dict_to_display_model(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotationDisplay]: + logger.debug("Converting entity dict to display model") annotations: list[EntityAnnotationDisplay] = [] + for entity_dict in entity_dict_list: + for key, entity in entity_dict.items(): + annotations.append(convert_annotation_to_display_model(entity)) + return annotations + + +def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnnotationDisplay]) -> list[list[str]]: + return [ + [str(getattr(entity, field)) for field in EntityAnnotationDisplay.model_fields] + for entity in entity_display_model + ] + + +def perform_named_entity_resolution(input_text: str): + """ + Performs clinical coding by processing the input text with MedCAT to extract and + annotate medical concepts (entities). + + Returns: + 1. A dictionary following the NER response model (EntityResponse), containing the original text + and the list of detected entities. + 2. A datatable-compatible list of lists, where each sublist represents an entity annotation and + its attributes for display purposes. + + This method is used as the main function for the Gradio MedCAT demo and MCP server, + enabling users to input free text and receive automatic annotation and coding of clinical entities. + + Args: + input_text (str): The input text to be processed and annotated for medical entities by MedCAT. + + Returns: + Tuple: + - dict: A dictionary following the NER response model (EntityResponse), containing the + original text and the list of detected entities. + - list[list[str]]: A datatable-compatible list of lists, where each sublist represents an + entity annotation and its attributes for display purposes. + + """ + logger.debug("Performing named entity resolution") + if not input_text or not input_text.strip(): + return None, None + + processor = get_medcat_processor(get_settings()) + input = ProcessAPIInputContent(text=input_text) + + result = processor.process_content(input.model_dump()) + + entity_ner_format: list[EntityAnnotation] = convert_entity_dict_to_annotations(result.annotations) + + logger.debug("Converting entity dict to display model") + annotations_as_display_format = convert_entity_dict_to_display_model(result.annotations) + response_datatable_format = convert_display_model_to_list_of_lists(annotations_as_display_format) + + response: EntityResponse = EntityResponse(entities=entity_ner_format, text=input_text) + result = response.model_dump(), response_datatable_format + logger.debug("Returning final result") + return result diff --git a/medcat-service/medcat_service/demo/gradio_demo.py b/medcat-service/medcat_service/demo/gradio_demo.py index a19bbab92..f9efc82fe 100644 --- a/medcat-service/medcat_service/demo/gradio_demo.py +++ b/medcat-service/medcat_service/demo/gradio_demo.py @@ -1,174 +1,91 @@ import gradio as gr -from pydantic import BaseModel - -from medcat_service.dependencies import get_medcat_processor, get_settings -from medcat_service.types import ProcessAPIInputContent -from medcat_service.types_entities import Entity - - -class EntityAnnotation(BaseModel): - """ - Expected data format for NER in gradio - """ - - entity: str - score: float - index: int - word: str - start: int - end: int +import medcat_service.demo.demo_content as demo_content +from medcat_service.demo.demo_logic import perform_named_entity_resolution +from medcat_service.dependencies import get_settings headers = ["Pretty Name", "Identifier", "Confidence Score", "Start Index", "End Index", "ID"] - -class EntityAnnotationDisplay(BaseModel): - """ - DIsplay data format for use in a datatable - """ - - pretty_name: str - identifier: str - score: float - start: int - end: int - id: int - # Misisng Meta Anns - - -class EntityResponse(BaseModel): - """ - Expected data format of gradio highlightedtext component - """ - - entities: list[EntityAnnotation] - text: str - - -def convert_annotation_to_ner_model(entity: Entity, index: int) -> EntityAnnotation: - return EntityAnnotation( - entity=entity.get("cui", "UNKNOWN"), - score=entity.get("acc", 0.0), - index=index, - word=entity.get("detected_name", ""), - start=entity.get("start", -1), - end=entity.get("end", -1), - ) - - -def convert_annotation_to_display_model(entity: Entity) -> EntityAnnotationDisplay: - return EntityAnnotationDisplay( - pretty_name=entity.get("pretty_name", ""), - identifier=entity.get("cui", "UNKNOWN"), - score=entity.get("acc", 0.0), - start=entity.get("start", -1), - end=entity.get("end", -1), - id=entity.get("id", -1), - # medcat-demo-app/webapp/demo/views.py - # if key == 'meta_anns': - # meta_anns=ent.get("meta_anns", {}) - # if meta_anns: - # for meta_ann in meta_anns.keys(): - # new_ent[meta_ann]=meta_anns[meta_ann]['value'] - ) - - -def convert_entity_dict_to_annotations(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotation]: - annotations: list[EntityAnnotation] = [] - for entity_dict in entity_dict_list: - for key, entity in entity_dict.items(): - annotations.append(convert_annotation_to_ner_model(entity, index=int(key))) - return annotations - - -def convert_entity_dict_to_display_model(entity_dict_list: list[dict[str, Entity]]) -> list[EntityAnnotationDisplay]: - annotations: list[EntityAnnotationDisplay] = [] - for entity_dict in entity_dict_list: - for key, entity in entity_dict.items(): - annotations.append(convert_annotation_to_display_model(entity)) - return annotations - - -def convert_display_model_to_list_of_lists(entity_display_model: list[EntityAnnotationDisplay]) -> list[list[str]]: - return [[str(getattr(entity, field)) for field in entity.model_fields] for entity in entity_display_model] - - -def perform_named_entity_resolution(input_text: str): +# CSS to set max height with scrollbar for HighlightedText output +# Target the component container and its content +highlighted_text_css = """ +#highlighted-text-output { + max-height: 460px; + overflow-y: auto; +} +""" +settings = get_settings() + +if settings.deid_mode: + with gr.Blocks(title="AnonCAT Demo", fill_width=True) as io: + gr.Markdown("# AnonCAT Demo") + with gr.Row(): + with gr.Column(): + input_text = gr.Textbox( + label="Input Text", + lines=3, + placeholder="Enter some text and click Deidentify..." + ) + examples = gr.Examples( + examples=[demo_content.short_example, demo_content.anoncat_example], + inputs=input_text, + ) + with gr.Row(): + clear_btn = gr.Button("Clear", variant="secondary") + deid_btn = gr.Button("Deidentify", variant="primary") + + with gr.Column(): + highlighted = gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output") + dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=4) + deid_btn.click( + perform_named_entity_resolution, + inputs=input_text, + outputs=[highlighted, dataframe] + ) + clear_btn.click( + lambda: ("", None, None), + outputs=[input_text, highlighted, dataframe] + ) + gr.Markdown(demo_content.anoncat_help_content) +else: + with gr.Blocks(title="MedCAT Demo", fill_width=True) as io: + gr.Markdown("# MedCAT Demo") + with gr.Row(): + with gr.Column(): + input_text = gr.Textbox( + label="Input Text", + lines=6, + placeholder="Enter some text and click Annotate..." + ) + examples = gr.Examples( + examples=[demo_content.short_example, demo_content.long_example], + inputs=input_text, + ) + with gr.Row(): + clear_btn = gr.Button("Clear", variant="secondary") + annotate_btn = gr.Button("Annotate", variant="primary") + with gr.Column(): + highlighted = gr.HighlightedText(label="Processed Text", elem_id="highlighted-text-output") + dataframe = gr.Dataframe(label="Annotations", headers=headers, interactive=False, max_chars=50) + annotate_btn.click( + perform_named_entity_resolution, + inputs=input_text, + outputs=[highlighted, dataframe] + ) + clear_btn.click( + lambda: ("", None, None), + outputs=[input_text, highlighted, dataframe] + ) + gr.Markdown(demo_content.article_footer) + + +def mount_gradio_app(app, path: str = "/demo") -> None: """ - Performs clinical coding by processing the input text with MedCAT to extract and - annotate medical concepts (entities). - - Returns: - 1. A dictionary following the NER response model (EntityResponse), containing the original text - and the list of detected entities. - 2. A datatable-compatible list of lists, where each sublist represents an entity annotation and - its attributes for display purposes. - - This method is used as the main function for the Gradio MedCAT demo and MCP server, - enabling users to input free text and receive automatic annotation and coding of clinical entities. + Mount the Gradio interface to the FastAPI app with a custom theme. Args: - input_text (str): The input text to be processed and annotated for medical entities by MedCAT. - - Returns: - Tuple: - - dict: A dictionary following the NER response model (EntityResponse), containing the - original text and the list of detected entities. - - list[list[str]]: A datatable-compatible list of lists, where each sublist represents an - entity annotation and its attributes for display purposes. - + app: The FastAPI application instance + path: The path at which to mount the Gradio app (default: "/demo") """ - - processor = get_medcat_processor(get_settings()) - input = ProcessAPIInputContent(text=input_text) - - result = processor.process_content(input.model_dump()) - - entity_ner_format: list[EntityAnnotation] = convert_entity_dict_to_annotations(result.annotations) - - annotations_as_display_format = convert_entity_dict_to_display_model(result.annotations) - response_datatable_format = convert_display_model_to_list_of_lists(annotations_as_display_format) - - response: EntityResponse = EntityResponse(entities=entity_ner_format, text=input_text) - return response.model_dump(), response_datatable_format - - -short_example = "John had been diagnosed with acute Kidney Failure the week before" - - -long_example = """ -Description: Intracerebral hemorrhage (very acute clinical changes occurred immediately). -CC: Left hand numbness on presentation; then developed lethargy later that day. - -HX: On the day of presentation, this 72 y/o RHM suddenly developed generalized weakness and lightheadedness, and could not rise from a chair. Four hours later he experienced sudden left hand numbness lasting two hours. There were no other associated symptoms except for the generalized weakness and lightheadedness. He denied vertigo. - -He had been experiencing falling spells without associated LOC up to several times a month for the past year. - -MEDS: procardia SR, Lasix, Ecotrin, KCL, Digoxin, Colace, Coumadin. - -PMH: 1)8/92 evaluation for presyncope (Echocardiogram showed: AV fibrosis/calcification, AV stenosis/insufficiency, MV stenosis with annular calcification and regurgitation, moderate TR, Decreased LV systolic function, severe LAE. MRI brain: focal areas of increased T2 signal in the left cerebellum and in the brainstem probably representing microvascular ischemic disease. IVG (MUGA scan)revealed: global hypokinesis of the LV and biventricular dysfunction, RV ejection Fx 45% and LV ejection Fx 39%. He was subsequently placed on coumadin severe valvular heart disease), 2)HTN, 3)Rheumatic fever and heart disease, 4)COPD, 5)ETOH abuse, 6)colonic polyps, 7)CAD, 8)CHF, 9)Appendectomy, 10)Junctional tachycardia. -""" # noqa: E501 - -article_footer = """ -## Disclaimer -This software is intended solely for the testing purposes and non-commercial use. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED. - -contact@cogstack.com for more information. - -Please note this is a limited version of MedCAT and it is not trained or validated by clinicans. -""" # noqa: E501 - -io = gr.Interface( - fn=perform_named_entity_resolution, - inputs="text", - outputs=[ - gr.HighlightedText(label="Processed Text"), - gr.Dataframe(label="Annotations", headers=headers, interactive=False), - ], - examples=[short_example, long_example], - preload_example=0, - title="MedCAT Demo", - description="Enter some text and click Annotate.", - flagging_mode="never", - article=article_footer, -) + theme = gr.themes.Default(primary_hue="blue", secondary_hue="teal") + gr.mount_gradio_app(app, io, path=path, theme=theme, css=highlighted_text_css) diff --git a/medcat-service/medcat_service/main.py b/medcat-service/medcat_service/main.py index 21f843b2a..1a260f000 100644 --- a/medcat-service/medcat_service/main.py +++ b/medcat-service/medcat_service/main.py @@ -3,12 +3,11 @@ import logging import logging.config -import gradio as gr from fastapi import FastAPI, Request from fastapi.responses import JSONResponse from medcat_service.config import Settings -from medcat_service.demo.gradio_demo import io +from medcat_service.demo.gradio_demo import mount_gradio_app from medcat_service.dependencies import get_settings from medcat_service.log_config import log_config from medcat_service.routers import admin, health, process @@ -37,7 +36,7 @@ app.include_router(health.router) app.include_router(process.router) -gr.mount_gradio_app(app, io, path="/demo", mcp_server=True) +mount_gradio_app(app, path="/demo") def configure_observability(settings: Settings, app: FastAPI): diff --git a/medcat-service/medcat_service/test/demo/__init__.py b/medcat-service/medcat_service/test/demo/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/medcat-service/medcat_service/test/demo/test_demo_logic.py b/medcat-service/medcat_service/test/demo/test_demo_logic.py new file mode 100644 index 000000000..f56fb677d --- /dev/null +++ b/medcat-service/medcat_service/test/demo/test_demo_logic.py @@ -0,0 +1,293 @@ +""" +Unit tests for demo logic functions, specifically perform_named_entity_resolution. +""" +import json +import unittest +from unittest.mock import MagicMock, patch + +from medcat_service.config import Settings +from medcat_service.demo.demo_logic import EntityResponse, perform_named_entity_resolution +from medcat_service.nlp_processor import MedCatProcessor +from medcat_service.test.common import ( + get_example_long_document, + get_example_short_document, + setup_medcat_processor, +) + + +class TestDemoLogic(unittest.TestCase): + """ + Test cases for demo logic functions. + """ + + processor: MedCatProcessor + + @classmethod + def setUpClass(cls): + """Set up test fixtures once before all test methods.""" + setup_medcat_processor() + cls.processor = MedCatProcessor(Settings()) + + def setUp(self): + """Set up test fixtures before each test method.""" + self.test_text = get_example_short_document() + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_valid_text(self, mock_get_processor, mock_get_settings): + """Test perform_named_entity_resolution with valid input text.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert + self.assertIsNotNone(result_dict) + self.assertIsNotNone(result_table) + assert result_dict is not None # Type narrowing for type checker + assert result_table is not None # Type narrowing for type checker + self.assertIn("text", result_dict) + self.assertIn("entities", result_dict) + self.assertEqual(result_dict["text"], self.test_text) + self.assertIsInstance(result_dict["entities"], list) + self.assertIsInstance(result_table, list) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_empty_string(self, mock_get_processor, mock_get_settings): + """Test perform_named_entity_resolution with empty string.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution("") + + # Assert + self.assertIsNone(result_dict) + self.assertIsNone(result_table) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_whitespace_only(self, mock_get_processor, mock_get_settings): + """Test perform_named_entity_resolution with whitespace-only string.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(" \n\t ") + + # Assert + self.assertIsNone(result_dict) + self.assertIsNone(result_table) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_response_structure(self, mock_get_processor, mock_get_settings): + """Test that the response has the correct structure.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert structure + self.assertIsNotNone(result_dict) + assert result_dict is not None # Type narrowing for type checker + self.assertIn("text", result_dict) + self.assertIn("entities", result_dict) + self.assertEqual(result_dict["text"], self.test_text) + + # Check entity structure if entities exist + if result_dict["entities"]: + entity = result_dict["entities"][0] + self.assertIn("entity", entity) + self.assertIn("score", entity) + self.assertIn("index", entity) + self.assertIn("word", entity) + self.assertIn("start", entity) + self.assertIn("end", entity) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_table_format(self, mock_get_processor, mock_get_settings): + """Test that the table format is correct.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert table structure + self.assertIsNotNone(result_table) + self.assertIsInstance(result_table, list) + # If there are annotations, check the structure + if result_table: + self.assertIsInstance(result_table[0], list) + # Should have 6 columns based on headers + if result_table[0]: + self.assertEqual(len(result_table[0]), 6) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_long_text(self, mock_get_processor, mock_get_settings): + """Test perform_named_entity_resolution with longer text.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + long_text = get_example_long_document() + + # Execute + result_dict, result_table = perform_named_entity_resolution(long_text) + + # Assert + self.assertIsNotNone(result_dict) + self.assertIsNotNone(result_table) + assert result_dict is not None # Type narrowing for type checker + self.assertEqual(result_dict["text"], long_text) + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_returns_entity_response_format( + self, mock_get_processor, mock_get_settings + ): + """Test that the result can be validated as EntityResponse format.""" + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = TestDemoLogic.processor + + # Execute + result_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert - validate the dict can be converted to EntityResponse + self.assertIsNotNone(result_dict) + assert result_dict is not None # Type narrowing for type checker + try: + response = EntityResponse(**result_dict) + self.assertEqual(response.text, self.test_text) + self.assertIsInstance(response.entities, list) + except Exception as e: + self.fail(f"Result dict should be valid EntityResponse format: {e}") + + @patch("medcat_service.demo.demo_logic.get_settings") + @patch("medcat_service.demo.demo_logic.get_medcat_processor") + def test_perform_named_entity_resolution_with_mocked_get_entities( + self, mock_get_processor, mock_get_settings + ): + """Test perform_named_entity_resolution with mocked get_entities returning JSON data.""" + # Mock entities data inline as JSON string + mock_annotations_json = """ + { + "annotations": [ + { + "1": { + "pretty_name": "Cerebral Hemorrhage", + "cui": "C2937358", + "type_ids": [ + "T046" + ], + "source_value": "Intracerebral hemorrhage", + "detected_name": "intracerebral~hemorrhage", + "acc": 1, + "context_similarity": 1, + "start": 13, + "end": 37, + "id": 1, + "meta_anns": { + "Status": { + "value": "Affirmed", + "confidence": 0.9999077320098877, + "name": "Status" + } + }, + "context_left": [], + "context_center": [], + "context_right": [], + "icd10": [ + { + "chapter": "I61", + "name": "Intracerebral haemorrhage" + }, + { + "chapter": "I61.9", + "name": "Intracerebral haemorrhage, unspecified" + } + ], + "snomed": [ + "S-1508000", + "S-155389003", + "S-155391006", + "S-155394003", + "S-195163003", + "S-195173001", + "S-266313001", + "S-274100004" + ] + } + } + ] + } + """ + mock_annotations_data = json.loads(mock_annotations_json) + + # Create a mock processor + mock_processor = MagicMock(spec=MedCatProcessor) + + # Mock process_content to return a ProcessResult with the expected structure + from medcat_service.types import ProcessResult + + mock_process_result = ProcessResult( + text=self.test_text, + annotations=mock_annotations_data["annotations"], + success=True, + timestamp="2024-01-01T00:00:00Z", + elapsed_time=0.1, + ) + mock_processor.process_content.return_value = mock_process_result + + # Setup mocks + mock_get_settings.return_value = Settings() + mock_get_processor.return_value = mock_processor + + # Expected result as JSON string for readability + expected = json.dumps( + { + "text": self.test_text, + "entities": [ + { + "entity": "C2937358", + "score": 1.0, + "index": 1, + "word": "intracerebral~hemorrhage", + "start": 13, + "end": 37, + } + ], + }, + indent=2, + sort_keys=True, + ) + + # Execute + actual_dict, result_table = perform_named_entity_resolution(self.test_text) + + # Assert + self.assertIsNotNone(actual_dict) + assert actual_dict is not None # Type narrowing for type checker + actual = json.dumps(actual_dict, indent=2, sort_keys=True) + self.assertEqual(expected, actual) + + # Verify process_content was called with correct input + mock_processor.process_content.assert_called_once() + call_args = mock_processor.process_content.call_args[0][0] + self.assertEqual(call_args["text"], self.test_text) + + +if __name__ == "__main__": + unittest.main() diff --git a/medcat-service/requirements-dev.txt b/medcat-service/requirements-dev.txt new file mode 100644 index 000000000..c4f4d8c64 --- /dev/null +++ b/medcat-service/requirements-dev.txt @@ -0,0 +1 @@ +uvicorn-hmr[all] diff --git a/medcat-service/requirements.txt b/medcat-service/requirements.txt index 27b723e0d..cf81d21c5 100644 --- a/medcat-service/requirements.txt +++ b/medcat-service/requirements.txt @@ -6,10 +6,10 @@ medcat[meta-cat,spacy,deid]~=2.2.0 # pinned because of issues with de-id models and past models (it will not do any de-id) transformers>=4.34.0,<5.0.0 requests==2.32.4 -fastapi[standard]==0.115.2 +fastapi[standard]==0.128.0 pydantic>=2.11.10,<2.12.5 pydantic-settings==2.10.1 -gradio[mcp]==5.38.0 +gradio[mcp]==6.2.0 prometheus-fastapi-instrumentator==7.1.0 opentelemetry-distro[otlp]==0.60b0 opentelemetry-instrumentation==0.60b0 diff --git a/medcat-service/start_service_debug.sh b/medcat-service/start_service_debug.sh index c0055ed09..798d66d3f 100644 --- a/medcat-service/start_service_debug.sh +++ b/medcat-service/start_service_debug.sh @@ -2,9 +2,9 @@ echo "Starting MedCAT Service" # Optional - Enable DeID mode with: -#export APP_MEDCAT_MODEL_PACK="models/examples/example-deid-model-pack.zip" -#export DEID_MODE=True -#export DEID_REDACT=True +# export APP_MEDCAT_MODEL_PACK="models/examples/example-deid-model-pack.zip" +# export DEID_MODE=True +# export DEID_REDACT=True if [ -z "${APP_MODEL_CDB_PATH}" ] && [ -z "${APP_MODEL_VOCAB_PATH}" ] && [ -z "${APP_MEDCAT_MODEL_PACK}" ]; then export APP_MEDCAT_MODEL_PACK="models/examples/example-medcat-v2-model-pack.zip" @@ -13,4 +13,11 @@ fi export APP_ENABLE_METRICS=${APP_ENABLE_METRICS:-True} -fastapi dev medcat_service/main.py +if [ "${HOT_MODULE_RELOADING}" = "True" ]; then + # Experimental: Hot module reloading. Need to `pip install -r requirements-dev.txt` + echo "Running medcat-service with hot module reloading" + uvicorn-hmr medcat_service/main:app --refresh --reload-include 'medcat_service' +else + fastapi dev medcat_service/main.py +fi +