diff --git a/.gitignore b/.gitignore index 875cbc73..ff506a58 100644 --- a/.gitignore +++ b/.gitignore @@ -176,3 +176,6 @@ examples/*.json debug_app/.gemini_cache/ debug_app/user_overrides.json debug_app/test_results.json + +.test_baseline.json +.test_final.json \ No newline at end of file diff --git a/circe/api.py b/circe/api.py index d9dea0aa..4c8f3a56 100644 --- a/circe/api.py +++ b/circe/api.py @@ -17,6 +17,7 @@ CohortExpressionQueryBuilder, MarkdownRender, ) +from .cohortdefinition.yaml_utils import snake_case_dict_to_cohort_expression from .vocabulary.concept import ConceptSet if TYPE_CHECKING: @@ -80,6 +81,37 @@ def cohort_expression_from_json(json_str: str) -> CohortExpression: raise ValueError(f"Invalid cohort expression JSON: {str(e)}") from e +def cohort_expression_from_yaml(yaml_str: str) -> CohortExpression: + """Load a cohort expression from a YAML string. + + Args: + yaml_str: YAML string containing the cohort definition with snake_case field names + + Returns: + CohortExpression instance + + Raises: + ValueError: If the YAML is invalid or doesn't conform to the schema + + Example: + >>> yaml_str = ''' + ... title: "My Cohort" + ... concept_sets: [] + ... primary_criteria: {...} + ... ''' + >>> expression = cohort_expression_from_yaml(yaml_str) + """ + import yaml + + try: + data = yaml.safe_load(yaml_str) + if data is None: + data = {} + return snake_case_dict_to_cohort_expression(data) + except Exception as e: + raise ValueError(f"Invalid cohort expression YAML: {str(e)}") from e + + def build_cohort_query( expression: CohortExpression, options: Optional[BuildExpressionQueryOptions] = None, diff --git a/circe/cli.py b/circe/cli.py index 592cbc13..a038ee65 100644 --- a/circe/cli.py +++ b/circe/cli.py @@ -8,9 +8,10 @@ import sys from pathlib import Path -from .api import build_cohort_query, cohort_expression_from_json, cohort_print_friendly +from .api import build_cohort_query, cohort_print_friendly from .cohortdefinition import BuildExpressionQueryOptions from .cohortdefinition.code_generator import to_python_code +from .io import load_expression def main(): @@ -24,12 +25,12 @@ def main(): # Validate command validate_parser = subparsers.add_parser("validate", help="Validate a cohort definition") - validate_parser.add_argument("input", help="Input JSON file") + validate_parser.add_argument("input", help="Input JSON or YAML file") validate_parser.add_argument("--quiet", "-q", action="store_true", help="Only show errors") # Generate SQL command sql_parser = subparsers.add_parser("generate-sql", help="Generate SQL from cohort definition") - sql_parser.add_argument("input", help="Input JSON file") + sql_parser.add_argument("input", help="Input JSON or YAML file") sql_parser.add_argument("--output", "-o", help="Output SQL file (default: stdout)") sql_parser.add_argument("--cdm-schema", default="@cdm_database_schema", help="CDM schema name") sql_parser.add_argument( @@ -47,7 +48,7 @@ def main(): # Render markdown command md_parser = subparsers.add_parser("render-markdown", help="Render cohort definition as Markdown") - md_parser.add_argument("input", help="Input JSON file") + md_parser.add_argument("input", help="Input JSON or YAML file") md_parser.add_argument("--output", "-o", help="Output Markdown file (default: stdout)") md_parser.add_argument("--no-validate", action="store_true", help="Skip validation") md_parser.add_argument("--title", "-t", type=str, help="Title to add to markdown document") @@ -56,12 +57,12 @@ def main(): source_parser = subparsers.add_parser( "generate-source", help="Generate Python source code from cohort definition" ) - source_parser.add_argument("input", help="Input JSON file") + source_parser.add_argument("input", help="Input JSON or YAML file") source_parser.add_argument("--output", "-o", help="Output Python file (default: stdout)") # Process command (all-in-one) process_parser = subparsers.add_parser("process", help="Validate, generate SQL and Markdown") - process_parser.add_argument("input", help="Input JSON file") + process_parser.add_argument("input", help="Input JSON or YAML file") process_parser.add_argument("--sql-output", help="SQL output file") process_parser.add_argument("--md-output", help="Markdown output file") process_parser.add_argument("--cdm-schema", default="@cdm_database_schema", help="CDM schema name") @@ -101,11 +102,8 @@ def main(): def validate_command(args): """Validate a cohort definition.""" - # Read JSON - json_str = Path(args.input).read_text() - - # Load and validate - expression = cohort_expression_from_json(json_str) + # Load expression (auto-detects JSON or YAML) + expression = load_expression(Path(args.input)) # Run validation checks warnings = expression.check() @@ -131,11 +129,8 @@ def validate_command(args): def generate_sql_command(args): """Generate SQL from cohort definition.""" - # Read JSON - json_str = Path(args.input).read_text() - - # Load expression - expression = cohort_expression_from_json(json_str) + # Load expression (auto-detects JSON or YAML) + expression = load_expression(Path(args.input)) # Validate if requested if not args.no_validate: @@ -166,11 +161,8 @@ def generate_sql_command(args): def render_markdown_command(args): """Render cohort definition as Markdown.""" - # Read JSON - json_str = Path(args.input).read_text() - - # Load expression - expression = cohort_expression_from_json(json_str) + # Load expression (auto-detects JSON or YAML) + expression = load_expression(Path(args.input)) # Validate if requested if not args.no_validate: @@ -195,11 +187,8 @@ def render_markdown_command(args): def process_command(args): """Process cohort definition (validate, generate SQL and Markdown).""" - # Read JSON - json_str = Path(args.input).read_text() - - # Load expression - expression = cohort_expression_from_json(json_str) + # Load expression (auto-detects JSON or YAML) + expression = load_expression(Path(args.input)) # Validate warnings = expression.check() @@ -237,11 +226,8 @@ def process_command(args): def generate_source_command(args): """Generate Python source code from cohort definition.""" - # Read JSON - json_str = Path(args.input).read_text() - - # Load expression - expression = cohort_expression_from_json(json_str) + # Load expression (auto-detects JSON or YAML) + expression = load_expression(Path(args.input)) # Generate Source Code source_code = to_python_code(expression) diff --git a/circe/cohortdefinition/yaml_utils.py b/circe/cohortdefinition/yaml_utils.py new file mode 100644 index 00000000..25f3c20c --- /dev/null +++ b/circe/cohortdefinition/yaml_utils.py @@ -0,0 +1,103 @@ +"""Utilities for YAML conversion with snake_case naming.""" + +import re +from typing import Any + +from circe.cohortdefinition.cohort import CohortExpression + + +def to_snake_case(name: str) -> str: + """Convert camelCase or PascalCase string to snake_case. + + Args: + name: String in camelCase or PascalCase format + + Returns: + String in snake_case format + """ + # Insert underscore before uppercase letters preceded by lowercase + s1 = re.sub("(.)([A-Z][a-z]+)", r"\1_\2", name) + # Insert underscore before uppercase letters preceded by lowercase or numbers + s2 = re.sub("([a-z0-9])([A-Z])", r"\1_\2", s1) + return s2.lower() + + +def to_pascal_case(name: str) -> str: + """Convert snake_case string to PascalCase. + + Args: + name: String in snake_case format + + Returns: + String in PascalCase format + """ + components = name.split("_") + return "".join(x.title() for x in components) + + +def dict_to_snake_case(data: Any) -> Any: + """Recursively convert all dict keys from PascalCase/camelCase to snake_case. + + Args: + data: Dictionary, list, or primitive value + + Returns: + Same structure with all dict keys converted to snake_case + """ + if isinstance(data, dict): + return {to_snake_case(key): dict_to_snake_case(value) for key, value in data.items()} + elif isinstance(data, list): + return [dict_to_snake_case(item) for item in data] + else: + return data + + +def dict_to_pascal_case(data: Any) -> Any: + """Recursively convert all dict keys from snake_case to PascalCase. + + Args: + data: Dictionary, list, or primitive value + + Returns: + Same structure with all dict keys converted to PascalCase + """ + if isinstance(data, dict): + return {to_pascal_case(key): dict_to_pascal_case(value) for key, value in data.items()} + elif isinstance(data, list): + return [dict_to_pascal_case(item) for item in data] + else: + return data + + +def cohort_expression_to_snake_case(expr: CohortExpression) -> dict[str, Any]: + """Convert CohortExpression to dict with snake_case field names. + + Args: + expr: CohortExpression instance + + Returns: + Dictionary representation with all keys in snake_case + """ + # Use model_dump to convert to dict with serialization aliases + expr_dict = expr.model_dump(by_alias=True) + # Convert all keys to snake_case + return dict_to_snake_case(expr_dict) + + +def snake_case_dict_to_cohort_expression(data: dict[str, Any]) -> CohortExpression: + """Convert snake_case dict to CohortExpression. + + Args: + data: Dictionary with snake_case keys + + Returns: + CohortExpression instance + """ + # CohortExpression models have populate_by_name=True which accepts snake_case + # So we can pass the data directly without conversion + try: + return CohortExpression.model_validate(data) + except Exception: + # If that fails, try converting to PascalCase as fallback + pascal_dict = dict_to_pascal_case(data) + return CohortExpression.model_validate(pascal_dict) diff --git a/circe/io.py b/circe/io.py index 8e75ae7e..af2f1515 100644 --- a/circe/io.py +++ b/circe/io.py @@ -12,8 +12,9 @@ from pathlib import Path from typing import Any, Union -from .api import cohort_expression_from_json +from .api import cohort_expression_from_json, cohort_expression_from_yaml from .cohortdefinition import CohortExpression +from .cohortdefinition.yaml_utils import cohort_expression_to_snake_case ExpressionInput = Union[CohortExpression, Mapping[str, Any], str, Path] @@ -25,7 +26,8 @@ def load_expression(value: ExpressionInput) -> CohortExpression: - CohortExpression - mapping/dict compatible with CohortExpression - JSON string - - path to a JSON file + - YAML string + - path to a JSON or YAML file """ if isinstance(value, CohortExpression): return value @@ -34,7 +36,11 @@ def load_expression(value: ExpressionInput) -> CohortExpression: return CohortExpression.model_validate(dict(value)) if isinstance(value, Path): - return cohort_expression_from_json(value.read_text(encoding="utf-8")) + content = value.read_text(encoding="utf-8") + if value.suffix in (".yaml", ".yml"): + return cohort_expression_from_yaml(content) + else: + return cohort_expression_from_json(content) if isinstance(value, str): stripped = value.strip() @@ -46,17 +52,44 @@ def load_expression(value: ExpressionInput) -> CohortExpression: # File-system path path = Path(value) if path.exists() and path.is_file(): - return cohort_expression_from_json(path.read_text(encoding="utf-8")) + content = path.read_text(encoding="utf-8") + if path.suffix in (".yaml", ".yml"): + return cohort_expression_from_yaml(content) + else: + return cohort_expression_from_json(content) # If it wasn't an existing path, attempt JSON parse for clearer errors. try: parsed = json.loads(stripped) except json.JSONDecodeError as exc: raise ValueError( - "Expected JSON string or path to a JSON file for cohort expression input." + "Expected JSON string, YAML string, or path to a JSON/YAML file for cohort expression input." ) from exc return CohortExpression.model_validate(parsed) raise TypeError( - "Unsupported expression input type. Expected CohortExpression, mapping, JSON string, or Path." + "Unsupported expression input type. Expected CohortExpression, mapping, JSON/YAML string, or Path." ) + + +def save_expression_as_yaml(expr: CohortExpression, path: str | Path) -> None: + """Save a CohortExpression as a YAML file with snake_case field names. + + Args: + expr: CohortExpression instance to save + path: File path to save the YAML file to + """ + import yaml + + path = Path(path) + yaml_dict = cohort_expression_to_snake_case(expr) + + # Write to file with nice YAML formatting + with open(path, "w", encoding="utf-8") as f: + yaml.dump( + yaml_dict, + f, + default_flow_style=False, + sort_keys=False, + allow_unicode=True, + ) diff --git a/examples/cohort_from_yaml.py b/examples/cohort_from_yaml.py new file mode 100644 index 00000000..9f55b828 --- /dev/null +++ b/examples/cohort_from_yaml.py @@ -0,0 +1,232 @@ +"""Example demonstrating YAML cohort definition and usage. + +This example shows: +1. Loading a cohort from a YAML file +2. Creating a cohort programmatically and saving as YAML +3. Working with YAML cohorts in the same way as JSON cohorts +""" + +from pathlib import Path +from tempfile import TemporaryDirectory + +from circe.api import build_cohort_query, cohort_expression_from_yaml, cohort_print_friendly +from circe.cohortdefinition import BuildExpressionQueryOptions +from circe.io import load_expression + + +def example_1_load_yaml_cohort(): + """Example 1: Load a cohort from YAML file.""" + print("=" * 60) + print("Example 1: Loading a YAML Cohort") + print("=" * 60) + + # Load a YAML cohort file + # The file uses snake_case naming convention, which is more Pythonic + cohort_path = Path(__file__).parent.parent / "tests" / "cohorts" / "isolated_immune_thrombocytopenia.yaml" + + if cohort_path.exists(): + # Method 1: Using load_expression (auto-detects YAML by extension) + cohort = load_expression(cohort_path) + print(f"✓ Loaded YAML cohort: {cohort.title}") + print(f" Concept sets: {len(cohort.concept_sets) if cohort.concept_sets else 0}") + + # Method 2: Directly from YAML string + yaml_content = cohort_path.read_text() + cohort_expression_from_yaml(yaml_content) + print("✓ Also loaded via cohort_expression_from_yaml()") + + return cohort + else: + print(f"✗ Example YAML file not found at {cohort_path}") + print(" Creating a simple YAML cohort instead...") + return None + + +def example_2_create_and_save_yaml(): + """Example 2: Create a cohort programmatically and save as YAML.""" + print("\n" + "=" * 60) + print("Example 2: Creating and Saving a YAML Cohort") + print("=" * 60) + + # Create YAML content with snake_case names + yaml_content = """ +title: "Hypertension Patients" +cdm_version_range: ">=5.0.0" + +concept_sets: + - id: 1 + name: "Hypertension diagnosis" + expression: + items: + - concept: + concept_id: 316866 + concept_name: "Essential hypertension" + domain_id: "Condition" + vocabulary_id: "SNOMED" + concept_class_id: "Clinical Finding" + standard_concept: "S" + is_excluded: false + include_descendants: true + include_mapped: false + is_excluded: false + include_descendants: false + include_mapped: false + +primary_criteria: + criteria_list: + - condition_occurrence: + codeset_id: 1 + condition_type_exclude: false + observation_window: + prior_days: 0 + post_days: 0 + primary_criteria_limit: + type: "All" + +inclusion_rules: [] +""" + + # Parse and save to a file + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "hypertension_cohort.yaml" + yaml_path.write_text(yaml_content) + print(f"✓ Created YAML cohort at {yaml_path}") + + # Load it back to verify + cohort = load_expression(yaml_path) + print(f"✓ Loaded cohort: '{cohort.title}'") + print(f" Concept sets: {len(cohort.concept_sets) if cohort.concept_sets else 0}") + + # Read back and show snake_case naming + loaded_yaml = yaml_path.read_text() + print("\n✓ YAML file uses snake_case naming:") + for line in loaded_yaml.split("\n")[:15]: + if line.strip() and not line.strip().startswith("#"): + print(f" {line}") + + return cohort, yaml_path + + +def example_3_yaml_sql_generation(): + """Example 3: Generate SQL from a YAML cohort.""" + print("\n" + "=" * 60) + print("Example 3: Generate SQL from YAML Cohort") + print("=" * 60) + + # Create a YAML cohort with proper primary_criteria + yaml_content = """ +title: "Simple Test Cohort" +concept_sets: [] +primary_criteria: + criteria_list: [] + observation_window: + prior_days: 0 + post_days: 0 + primary_criteria_limit: + type: "All" +""" + + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "test_cohort.yaml" + yaml_path.write_text(yaml_content) + + # Load YAML cohort + cohort = load_expression(yaml_path) + + # Generate SQL (same as with JSON cohorts) + options = BuildExpressionQueryOptions() + options.cdm_schema = "cdm" + options.target_table = "public.cohort" + options.cohort_id = 1 + + sql = build_cohort_query(cohort, options) + print("✓ Generated SQL from YAML cohort") + print("\nSQL Preview (first 20 lines):") + print("-" * 60) + lines = sql.split("\n") + for line in lines[:20]: + print(line) + if len(lines) > 20: + print("... (truncated)") + + +def example_4_yaml_markdown_generation(): + """Example 4: Generate Markdown from a YAML cohort.""" + print("\n" + "=" * 60) + print("Example 4: Generate Markdown from YAML Cohort") + print("=" * 60) + + yaml_content = """ +title: "Drug Allergy Cohort" +concept_sets: + - id: 1 + name: "Penicillin allergy" + expression: + items: [] + is_excluded: false + include_descendants: false + include_mapped: false +primary_criteria: null +""" + + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "allergy_cohort.yaml" + yaml_path.write_text(yaml_content) + + # Load YAML cohort + cohort = load_expression(yaml_path) + + # Generate Markdown (same as with JSON cohorts) + markdown = cohort_print_friendly(cohort, include_concept_sets=True, title="YAML Cohort Example") + + print("✓ Generated Markdown from YAML cohort") + print("\nMarkdown Preview (first 30 lines):") + print("-" * 60) + lines = markdown.split("\n") + for line in lines[:30]: + print(line) + if len(lines) > 30: + print("... (truncated)") + + +def example_5_yaml_vs_json(): + """Example 5: Compare YAML and JSON formats.""" + print("\n" + "=" * 60) + print("Example 5: YAML vs JSON Format Comparison") + print("=" * 60) + + print("\nJSON Format (PascalCase):") + print("-" * 40) + print(" - Uses PascalCase field names: conceptSets, primaryCriteria, etc.") + print(" - More compact representation") + print(" - Compatible with Java/R CIRCE implementations") + print("\nYAML Format (snake_case):") + print("-" * 40) + print(" - Uses snake_case field names: concept_sets, primary_criteria, etc.") + print(" - More readable for Python developers") + print(" - Better matches Python naming conventions") + print("\nBoth formats are supported and interchangeable in circepy!") + + +def main(): + """Run all examples.""" + print("\n") + print("╔" + "=" * 58 + "╗") + print("║" + " " * 58 + "║") + print("║" + " YAML Cohort Support Examples in circepy".center(58) + "║") + print("║" + " " * 58 + "║") + print("╚" + "=" * 58 + "╝") + + example_1_load_yaml_cohort() + example_2_create_and_save_yaml() + example_3_yaml_sql_generation() + example_4_yaml_markdown_generation() + example_5_yaml_vs_json() + + print("\n" + "=" * 60) + print("All examples completed!") + print("=" * 60 + "\n") + + +if __name__ == "__main__": + main() diff --git a/pyproject.toml b/pyproject.toml index d78d6fa6..4f0b2a09 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,8 @@ requires-python = ">=3.9" dependencies = [ "pydantic>=2.0.0", "typing-extensions>=4.0.0", - "jinja2>=3.1.0" + "jinja2>=3.1.0", + "PyYAML>=6.0" ] [project.optional-dependencies] diff --git a/tests/cohorts/isolated_immune_thrombocytopenia.yaml b/tests/cohorts/isolated_immune_thrombocytopenia.yaml new file mode 100644 index 00000000..e5dec722 --- /dev/null +++ b/tests/cohorts/isolated_immune_thrombocytopenia.yaml @@ -0,0 +1,1409 @@ +cdm_version_range: '>=5.0.0' +primary_criteria: + criteria_list: + - condition_occurrence: + codeset_id: 30 + condition_type_exclude: false + observation_window: + prior_days: 0 + post_days: 0 + primary_criteria_limit: + type: All +concept_sets: +- id: 7 + name: Platelet measurement + expression: + items: + - concept: + concept_id: 4267147 + concept_name: Platelet count + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '61928009' + domain_id: Measurement + vocabulary_id: SNOMED + concept_class_id: Procedure + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 3031586 + concept_name: Platelets [#/volume] in Blood by Estimate + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: 49497-1 + domain_id: Measurement + vocabulary_id: LOINC + concept_class_id: Lab Test + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 3050583 + concept_name: Platelets panel - Blood by Automated count + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: 53800-9 + domain_id: Measurement + vocabulary_id: LOINC + concept_class_id: Lab Test + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 3007461 + concept_name: Platelets [#/volume] in Blood + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: 26515-7 + domain_id: Measurement + vocabulary_id: LOINC + concept_class_id: Lab Test + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 37393863 + concept_name: Platelet count + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '1022651000000100' + domain_id: Measurement + vocabulary_id: SNOMED + concept_class_id: Observable Entity + is_excluded: false + include_descendants: true + include_mapped: false +- id: 9 + name: Congenital or genetic causes for thrombocytopenia + expression: + items: + - concept: + concept_id: 37397537 + concept_name: Beta thalassemia X-linked thrombocytopenia syndrome + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '718196002' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4121131 + concept_name: Inherited platelet disorder + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '234469001' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4006469 + concept_name: Reticular dysgenesis + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '111584000' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 42537688 + concept_name: Congenital thrombocytopenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '737221003' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 437242 + concept_name: Congenital thrombocytopenic purpura + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '267535004' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false +- id: 10 + name: Thrombocytosis + expression: + items: + - concept: + concept_id: 4280071 + concept_name: Thrombocytosis + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '6631009' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 36715584 + concept_name: Refractory anemia with ringed sideroblasts associated with marked + thrombocytosis + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '721302006' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 45766614 + concept_name: Refractory anemia with ring sideroblasts associated with marked + thrombocytosis + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '703817002' + domain_id: Observation + vocabulary_id: SNOMED + concept_class_id: Morph Abnormality + is_excluded: false + include_descendants: true + include_mapped: false +- id: 24 + name: Pancytopenia & bone marrow disorder + expression: + items: + - concept: + concept_id: 432881 + concept_name: Pancytopenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '127034005' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4131124 + concept_name: Bone marrow disorder + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '127035006' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false +- id: 25 + name: Neutropenia, Agranulocytosis or Unspecified Leukopenia + expression: + items: + - concept: + concept_id: 36715585 + concept_name: Refractory neutropenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '721303001' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 320073 + concept_name: Neutropenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '165517008' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 42872951 + concept_name: Refractory neutropenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '450946009' + domain_id: Observation + vocabulary_id: SNOMED + concept_class_id: Morph Abnormality + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 435224 + concept_name: Leukopenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '84828003' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: false + include_mapped: false + - concept: + concept_id: 440689 + concept_name: Agranulocytosis + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '17182001' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 45766061 + concept_name: Periodontitis associated with chronic familial neutropenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '703148008' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4119158 + concept_name: Neutropenic disorder + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '303011007' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false +- id: 26 + name: Neutrophil Absolute Count + expression: + items: + - concept: + concept_id: 37393856 + concept_name: Neutrophil count + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '1022551000000104' + domain_id: Measurement + vocabulary_id: SNOMED + concept_class_id: Observable Entity + is_excluded: false + include_descendants: false + include_mapped: false + - concept: + concept_id: 4148615 + concept_name: Neutrophil count + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '30630007' + domain_id: Measurement + vocabulary_id: SNOMED + concept_class_id: Procedure + is_excluded: false + include_descendants: false + include_mapped: false + - concept: + concept_id: 3017732 + concept_name: Neutrophils [#/volume] in Blood + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: 26499-4 + domain_id: Measurement + vocabulary_id: LOINC + concept_class_id: Lab Test + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 3013650 + concept_name: Neutrophils [#/volume] in Blood by Automated count + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: 751-8 + domain_id: Measurement + vocabulary_id: LOINC + concept_class_id: Lab Test + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 3017501 + concept_name: Neutrophils [#/volume] in Blood by Manual count + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: 753-4 + domain_id: Measurement + vocabulary_id: LOINC + concept_class_id: Lab Test + is_excluded: false + include_descendants: true + include_mapped: false +- id: 27 + name: Anemia or Reticulocytopenia + expression: + items: + - concept: + concept_id: 2617149 + concept_name: Erythropoetic stimulating agent (esa) administered to treat + anemia due to anti-cancer radiotherapy + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: EB + domain_id: Observation + vocabulary_id: HCPCS + concept_class_id: HCPCS Modifier + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 36716029 + concept_name: Hyperuricemia, anemia, renal failure syndrome + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '721840000' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 2617148 + concept_name: Erythropoetic stimulating agent (esa) administered to treat + anemia due to anti-cancer chemotherapy + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: EA + domain_id: Observation + vocabulary_id: HCPCS + concept_class_id: HCPCS Modifier + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4029669 + concept_name: Refractory anemia with sideroblasts + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '128846006' + domain_id: Observation + vocabulary_id: SNOMED + concept_class_id: Morph Abnormality + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4120449 + concept_name: von Jaksch's anemia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '234345001' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 35624756 + concept_name: Anemia due to and following chemotherapy + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '767657005' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4028718 + concept_name: Refractory anemia with excess blasts + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '128847002' + domain_id: Observation + vocabulary_id: SNOMED + concept_class_id: Morph Abnormality + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 37017165 + concept_name: GATA binding protein 1 related thrombocytopenia with dyserythropoiesis + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '713388002' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4144746 + concept_name: Hereditary hemoglobinopathy + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '427306008' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 2617150 + concept_name: Erythropoetic stimulating agent (esa) administered to treat + anemia not due to anti-cancer radiotherapy or anti-cancer chemotherapy + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: EC + domain_id: Observation + vocabulary_id: HCPCS + concept_class_id: HCPCS Modifier + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 44831063 + concept_name: Anemia associated with other specified nutritional deficiency + standard_concept: N + standard_concept_caption: Non-Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '281.8' + domain_id: Condition + vocabulary_id: ICD9CM + concept_class_id: 4-dig billing code + is_excluded: false + include_descendants: false + include_mapped: false + - concept: + concept_id: 4105643 + concept_name: Myasthenic syndrome due to pernicious anemia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '193213003' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 37398911 + concept_name: Anemia in chronic kidney disease stage 4 + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '691401000119104' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 438869 + concept_name: Perinatal jaundice due to hereditary hemolytic anemia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '56921004' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4183718 + concept_name: Pericarditis associated with severe chronic anemia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '43742007' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 37395652 + concept_name: Anemia in chronic kidney disease stage 5 + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '691411000119101' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4125630 + concept_name: Chronic non-spherocytic hemolytic anemia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '234402007' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4217370 + concept_name: Aase syndrome + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '71988008' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4267432 + concept_name: Erythropenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '62574001' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 36680584 + concept_name: Autosomal dominant aplasia and myelodysplasia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '778006008' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 37018722 + concept_name: Anemia caused by zidovudine + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '713496008' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4295183 + concept_name: Mixed hemoglobin disorder + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '38589006' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4028717 + concept_name: Refractory anemia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '128845005' + domain_id: Observation + vocabulary_id: SNOMED + concept_class_id: Morph Abnormality + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 44783626 + concept_name: Pulmonary arterial hypertension associated with chronic hemolytic + anemia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '697908003' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4159748 + concept_name: Hand-foot syndrome in sickle cell anemia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '371104006' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4051681 + concept_name: Reticulocytopenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '124961001' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 37017132 + concept_name: Anemia co-occurrent with human immunodeficiency virus infection + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '713349004' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4029670 + concept_name: Refractory anemia with excess blasts in transformation + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '128848007' + domain_id: Observation + vocabulary_id: SNOMED + concept_class_id: Morph Abnormality + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4006467 + concept_name: Anemia due to infection + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '111570005' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 40478891 + concept_name: Erythropoietin resistance in anemia of chronic kidney disease + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '444271000' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 36715584 + concept_name: Refractory anemia with ringed sideroblasts associated with marked + thrombocytosis + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '721302006' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false +- id: 28 + name: Hemoglobin measurement + expression: + items: + - concept: + concept_id: 3000963 + concept_name: Hemoglobin [Mass/volume] in Blood + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: 718-7 + domain_id: Measurement + vocabulary_id: LOINC + concept_class_id: Lab Test + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 3027484 + concept_name: Hemoglobin [Mass/volume] in Blood by calculation + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: 20509-6 + domain_id: Measurement + vocabulary_id: LOINC + concept_class_id: Lab Test + is_excluded: false + include_descendants: true + include_mapped: false +- id: 30 + name: Immune Thrombocytopenia + expression: + items: + - concept: + concept_id: 4103532 + concept_name: Immune thrombocytopenia + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '2897005' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false + - concept: + concept_id: 4119134 + concept_name: Thrombocytopenic purpura + standard_concept: S + standard_concept_caption: Standard + invalid_reason: V + invalid_reason_caption: Valid + concept_code: '302873008' + domain_id: Condition + vocabulary_id: SNOMED + concept_class_id: Clinical Finding + is_excluded: false + include_descendants: true + include_mapped: false +qualified_limit: + type: First +expression_limit: + type: All +inclusion_rules: +- name: No congenital or genetic thrombocytopenia + expression: + type: ALL + criteria_list: + - criteria: + condition_occurrence: + codeset_id: 9 + condition_type_exclude: false + start_window: + start: + coeff: -1 + end: + days: 7 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + demographic_criteria_list: [] + groups: [] +- name: No Platelet count > 100 on index date + expression: + type: ALL + criteria_list: + - criteria: + measurement: + codeset_id: 7 + measurement_type_exclude: false + value_as_number: + value: 101 + op: bt + extent: 450 + unit: + - concept_id: 8848 + concept_name: thousand per microliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*3/uL + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8961 + concept_name: thousand per cubic millimeter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*3/mm3 + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 9444 + concept_name: billion per liter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*9/L + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8816 + concept_name: million per milliliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*6/mL + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 44777588 + concept_name: billion cells per liter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*9.{cellls}/L + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + start_window: + start: + days: 0 + coeff: -1 + end: + days: 0 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + demographic_criteria_list: [] + groups: [] +- name: No thrombocytosis on index date + expression: + type: ALL + criteria_list: + - criteria: + condition_occurrence: + codeset_id: 10 + condition_type_exclude: false + start_window: + start: + days: 0 + coeff: -1 + end: + days: 0 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + demographic_criteria_list: [] + groups: [] +- name: No Pancytopenia or bone marrow disorder diagnosis within 7 days + expression: + type: ALL + criteria_list: + - criteria: + condition_occurrence: + codeset_id: 24 + condition_type_exclude: false + start_window: + start: + days: 7 + coeff: -1 + end: + days: 7 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + demographic_criteria_list: [] + groups: [] +- name: No Neutropenia, Agranulocytosis diagnosis within 7 days + expression: + type: ALL + criteria_list: + - criteria: + condition_occurrence: + codeset_id: 25 + condition_type_exclude: false + start_window: + start: + days: 7 + coeff: -1 + end: + days: 7 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + demographic_criteria_list: [] + groups: [] +- name: No low neutrophil count within 7 days + expression: + type: ALL + criteria_list: + - criteria: + measurement: + codeset_id: 26 + measurement_type_exclude: false + value_as_number: + value: 0.01 + op: bt + extent: 1.499 + unit: + - concept_id: 9444 + concept_name: billion per liter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*9/L + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8848 + concept_name: thousand per microliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*3/uL + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8816 + concept_name: million per milliliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*6/mL + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8961 + concept_name: thousand per cubic millimeter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*3/mm3 + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 44777588 + concept_name: billion cells per liter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*9.{cellls}/L + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + range_low: + value: 1.5 + op: bt + extent: 4 + start_window: + start: + days: 7 + coeff: -1 + end: + days: 7 + coeff: -1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + - criteria: + measurement: + codeset_id: 26 + measurement_type_exclude: false + value_as_number: + value: 10 + op: bt + extent: 1500 + unit: + - concept_id: 8784 + concept_name: cells per microliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: '{cells}/uL' + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8647 + concept_name: per microliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: /uL + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + start_window: + start: + days: 7 + coeff: -1 + end: + days: 7 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + demographic_criteria_list: [] + groups: [] +- name: No Anemia diagnosis within 7 days + expression: + type: ALL + criteria_list: + - criteria: + condition_occurrence: + codeset_id: 27 + condition_type_exclude: false + start_window: + start: + days: 7 + coeff: -1 + end: + days: 7 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + - criteria: + observation: + codeset_id: 27 + observation_type_exclude: false + start_window: + start: + days: 7 + coeff: -1 + end: + days: 7 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + demographic_criteria_list: [] + groups: [] +- name: No low Hemoglobin measurement in blood within 7 days + expression: + type: ALL + criteria_list: + - criteria: + measurement: + codeset_id: 28 + measurement_type_exclude: false + value_as_number: + value: 4 + op: bt + extent: 11 + unit: + - concept_id: 4121395 + concept_name: g/dL + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: '258795003' + domain_id: Unit + vocabulary_id: SNOMED + concept_class_id: null + - concept_id: 8713 + concept_name: gram per deciliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: g/dL + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8950 + concept_name: gram per deciliter calculated + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: g/dL{calc} + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + start_window: + start: + days: 7 + coeff: -1 + end: + days: 7 + coeff: 1 + use_index_end: false + use_event_end: false + restrict_visit: false + ignore_observation_period: true + occurrence: + type: 0 + count: 0 + is_distinct: false + demographic_criteria_list: [] + groups: [] +end_strategy: + date_offset: + date_field: EndDate + offset: 0 +censoring_criteria: +- measurement: + codeset_id: 7 + measurement_type_exclude: false + value_as_number: + value: 150 + op: bt + extent: 450 + unit: + - concept_id: 8848 + concept_name: thousand per microliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*3/uL + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8961 + concept_name: thousand per cubic millimeter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*3/mm3 + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 9444 + concept_name: billion per liter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*9/L + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 8816 + concept_name: million per milliliter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*6/mL + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null + - concept_id: 44777588 + concept_name: billion cells per liter + standard_concept: null + standard_concept_caption: Unknown + invalid_reason: null + invalid_reason_caption: Unknown + concept_code: 10*9.{cellls}/L + domain_id: Unit + vocabulary_id: UCUM + concept_class_id: null +- condition_occurrence: + codeset_id: 10 + condition_type_exclude: false +collapse_settings: + collapse_type: ERA + era_pad: 0 +censor_window: {} diff --git a/tests/test_yaml_cohorts.py b/tests/test_yaml_cohorts.py new file mode 100644 index 00000000..f3fcf6b8 --- /dev/null +++ b/tests/test_yaml_cohorts.py @@ -0,0 +1,355 @@ +"""Tests for YAML cohort support with snake_case naming.""" + +import json +from pathlib import Path +from tempfile import TemporaryDirectory + +import pytest +import yaml + +from circe.api import build_cohort_query, cohort_expression_from_json, cohort_expression_from_yaml +from circe.cohortdefinition import BuildExpressionQueryOptions +from circe.cohortdefinition.yaml_utils import ( + cohort_expression_to_snake_case, + dict_to_pascal_case, + dict_to_snake_case, + snake_case_dict_to_cohort_expression, + to_pascal_case, + to_snake_case, +) +from circe.io import load_expression, save_expression_as_yaml + + +class TestCaseConversion: + """Test case conversion utilities.""" + + def test_to_snake_case_pascal_case(self): + """Test converting PascalCase to snake_case.""" + assert to_snake_case("PrimaryCriteria") == "primary_criteria" + assert to_snake_case("ConceptSets") == "concept_sets" + assert to_snake_case("CodesetId") == "codeset_id" + assert to_snake_case("CohortExpression") == "cohort_expression" + + def test_to_snake_case_camel_case(self): + """Test converting camelCase to snake_case.""" + assert to_snake_case("primaryCriteria") == "primary_criteria" + assert to_snake_case("conceptSets") == "concept_sets" + assert to_snake_case("codesetId") == "codeset_id" + + def test_to_snake_case_with_numbers(self): + """Test converting with numbers.""" + assert to_snake_case("CodesetId") == "codeset_id" + assert to_snake_case("Concept1Id") == "concept1_id" + + def test_to_snake_case_all_caps(self): + """Test converting ALL_CAPS and ID suffixes.""" + # CONCEPT_ID already has underscores, just gets lowercased + assert to_snake_case("CONCEPT_ID") == "concept_id" + # ConceptID gets underscores before capitals and lowercased + assert to_snake_case("ConceptID") == "concept_id" + + def test_to_pascal_case(self): + """Test converting snake_case to PascalCase.""" + assert to_pascal_case("primary_criteria") == "PrimaryCriteria" + assert to_pascal_case("concept_sets") == "ConceptSets" + assert to_pascal_case("codeset_id") == "CodesetId" + + def test_dict_to_snake_case_simple(self): + """Test converting dict keys to snake_case.""" + data = {"PrimaryCriteria": "value", "ConceptSets": []} + result = dict_to_snake_case(data) + assert "primary_criteria" in result + assert "concept_sets" in result + assert result["primary_criteria"] == "value" + + def test_dict_to_snake_case_nested(self): + """Test converting nested dict keys to snake_case.""" + data = {"PrimaryCriteria": {"CriteriaList": [{"ConditionOccurrence": {"CodesetId": 1}}]}} + result = dict_to_snake_case(data) + assert "primary_criteria" in result + assert "criteria_list" in result["primary_criteria"] + assert isinstance(result["primary_criteria"]["criteria_list"], list) + assert "condition_occurrence" in result["primary_criteria"]["criteria_list"][0] + + def test_dict_to_pascal_case_simple(self): + """Test converting dict keys back to PascalCase.""" + data = {"primary_criteria": "value", "concept_sets": []} + result = dict_to_pascal_case(data) + assert "PrimaryCriteria" in result + assert "ConceptSets" in result + + def test_dict_to_pascal_case_nested(self): + """Test converting nested dict keys back to PascalCase.""" + data = {"primary_criteria": {"criteria_list": [{"condition_occurrence": {"codeset_id": 1}}]}} + result = dict_to_pascal_case(data) + assert "PrimaryCriteria" in result + assert "CriteriaList" in result["PrimaryCriteria"] + + +class TestYAMLParsing: + """Test YAML parsing and conversion.""" + + @pytest.fixture + def example_json_cohort(self): + """Load the example JSON cohort from tests.""" + cohorts_dir = Path(__file__).parent / "cohorts" + json_file = cohorts_dir / "isolated_immune_thrombocytopenia.json" + if json_file.exists(): + return json.loads(json_file.read_text()) + # Return minimal valid cohort if file doesn't exist + return {"concept_sets": [], "primary_criteria": None} + + def test_cohort_expression_from_yaml_simple(self): + """Test parsing simple YAML cohort.""" + yaml_str = """ +title: "Test Cohort" +concept_sets: [] +primary_criteria: null +""" + expr = cohort_expression_from_yaml(yaml_str) + assert expr.title == "Test Cohort" + assert expr.concept_sets == [] + + def test_cohort_expression_from_yaml_with_criteria(self): + """Test parsing YAML with more complex structure.""" + yaml_str = """ +title: "Test Cohort" +concept_sets: + - id: 1 + name: "Test Concept Set" + expression: + items: [] + is_excluded: false + include_descendants: false + include_mapped: false +primary_criteria: null +""" + expr = cohort_expression_from_yaml(yaml_str) + assert expr.title == "Test Cohort" + assert len(expr.concept_sets) == 1 + assert expr.concept_sets[0].id == 1 + assert expr.concept_sets[0].name == "Test Concept Set" + + def test_cohort_expression_to_snake_case(self, example_json_cohort): + """Test converting CohortExpression to snake_case dict.""" + import json + + from circe.api import cohort_expression_from_json + + json_str = json.dumps(example_json_cohort) + expr = cohort_expression_from_json(json_str) + result = cohort_expression_to_snake_case(expr) + + # Check that keys are in snake_case + assert isinstance(result, dict) + # Should not have PascalCase keys at top level + assert "PrimaryCriteria" not in result + assert "primary_criteria" in result or result == {} + + def test_snake_case_dict_to_cohort_expression(self): + """Test converting snake_case dict to CohortExpression.""" + data = { + "title": "Test Cohort", + "concept_sets": [ + { + "id": 1, + "name": "Test", + "expression": { + "items": [], + "is_excluded": False, + "include_descendants": False, + "include_mapped": False, + }, + } + ], + } + expr = snake_case_dict_to_cohort_expression(data) + assert expr.title == "Test Cohort" + assert len(expr.concept_sets) == 1 + + +class TestYAMLIO: + """Test YAML file I/O operations.""" + + def test_save_expression_as_yaml(self): + """Test saving CohortExpression to YAML file.""" + + yaml_str = """ +title: "Test Cohort" +concept_sets: [] +""" + expr = cohort_expression_from_yaml(yaml_str) + + with TemporaryDirectory() as tmpdir: + output_path = Path(tmpdir) / "test_cohort.yaml" + save_expression_as_yaml(expr, output_path) + + assert output_path.exists() + content = output_path.read_text() + assert "test_cohort" in content.lower() or "Test Cohort" in content + + def test_load_expression_yaml_file(self): + """Test loading YAML file via load_expression.""" + yaml_content = """ +title: "Test Cohort" +concept_sets: [] +""" + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "test.yaml" + yaml_path.write_text(yaml_content) + + expr = load_expression(yaml_path) + assert expr.title == "Test Cohort" + + def test_load_expression_yml_file(self): + """Test loading .yml file extension.""" + yaml_content = """ +title: "Test Cohort" +concept_sets: [] +""" + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "test.yml" + yaml_path.write_text(yaml_content) + + expr = load_expression(yaml_path) + assert expr.title == "Test Cohort" + + def test_load_expression_json_still_works(self): + """Test that JSON files still work via load_expression.""" + json_content = '{"title": "JSON Cohort", "conceptSets": []}' + + with TemporaryDirectory() as tmpdir: + json_path = Path(tmpdir) / "test.json" + json_path.write_text(json_content) + + expr = load_expression(json_path) + assert expr.title == "JSON Cohort" + + +class TestRoundTrip: + """Test round-trip conversions.""" + + def test_yaml_to_json_roundtrip(self): + """Test converting YAML -> JSON and back.""" + yaml_str = """ +title: "Round Trip Test" +concept_sets: [] +primary_criteria: null +""" + # Load from YAML + expr1 = cohort_expression_from_yaml(yaml_str) + + # Convert to dict and back + snake_dict = cohort_expression_to_snake_case(expr1) + expr2 = snake_case_dict_to_cohort_expression(snake_dict) + + assert expr1.title == expr2.title + assert expr1.concept_sets == expr2.concept_sets + + def test_json_to_yaml_to_json(self): + """Test converting JSON -> YAML -> JSON preserves equivalence.""" + # Create a minimal but valid cohort with primary_criteria + example_json_cohort = { + "title": "Round Trip Test", + "concept_sets": [], + "primary_criteria": { + "criteria_list": [], + "observation_window": {"prior_days": 0, "post_days": 0}, + "primary_criteria_limit": {"type": "All"}, + }, + } + + import json + + # Load from JSON + json_str = json.dumps(example_json_cohort) + expr1 = cohort_expression_from_json(json_str) + + # Save to YAML and reload + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "temp.yaml" + save_expression_as_yaml(expr1, yaml_path) + expr2 = load_expression(yaml_path) + + # Both should have same title + assert expr1.title == expr2.title + + # Both should be able to generate SQL with same options + options = BuildExpressionQueryOptions() + sql1 = build_cohort_query(expr1, options) + sql2 = build_cohort_query(expr2, options) + # SQL should be identical for same input + assert sql1 == sql2 + + def test_yaml_preserves_snake_case_on_roundtrip(self): + """Test that YAML round-trip preserves snake_case formatting.""" + yaml_str = """ +title: "Snake Case Test" +concept_sets: [] +inclusion_rules: [] +""" + expr = cohort_expression_from_yaml(yaml_str) + + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "temp.yaml" + save_expression_as_yaml(expr, yaml_path) + content = yaml_path.read_text() + + # Should have snake_case keys + data = yaml.safe_load(content) + # Find a key that should be in snake_case + assert any(key for key in data if "_" in key or key in ["title"]) + + +class TestCLIIntegration: + """Test CLI commands with YAML files.""" + + def test_yaml_file_with_validate_command(self): + """Test validate command with YAML input.""" + yaml_content = """ +title: "CLI Test Cohort" +concept_sets: [] +""" + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "test.yaml" + yaml_path.write_text(yaml_content) + + # load_expression should handle it + expr = load_expression(yaml_path) + assert expr.title == "CLI Test Cohort" + + def test_yaml_file_with_sql_generation(self): + """Test SQL generation from YAML input.""" + yaml_content = """ +title: "SQL Generation Test" +concept_sets: [] +primary_criteria: + criteria_list: [] + observation_window: + prior_days: 0 + post_days: 0 + primary_criteria_limit: + type: "All" +""" + with TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir) / "test.yaml" + yaml_path.write_text(yaml_content) + + expr = load_expression(yaml_path) + options = BuildExpressionQueryOptions() + sql = build_cohort_query(expr, options) + + assert isinstance(sql, str) + # Should contain some SQL + assert len(sql) > 0 + + +@pytest.fixture +def example_json_cohort(): + """Load the example JSON cohort from tests.""" + cohorts_dir = Path(__file__).parent / "cohorts" + json_file = cohorts_dir / "isolated_immune_thrombocytopenia.json" + if json_file.exists(): + return json.loads(json_file.read_text()) + # Return minimal valid cohort if file doesn't exist + return {"concept_sets": [], "primary_criteria": None}