OHDSI · rkboyce · May 14, 2026 · May 14, 2026 · May 14, 2026 · May 14, 2026
diff --git a/R/slashOhdsiStrategusAssistant/R/keeper_review_workflow.R b/R/slashOhdsiStrategusAssistant/R/keeper_review_workflow.R
diff --git a/R/slashOhdsiStrategusAssistant/R/strategus_cohort_methods_shell.R b/R/slashOhdsiStrategusAssistant/R/strategus_cohort_methods_shell.R
diff --git a/R/slashOhdsiStrategusAssistant/R/strategus_incidence_shell.R b/R/slashOhdsiStrategusAssistant/R/strategus_incidence_shell.R
diff --git a/R/slashOhdsiStrategusAssistant/R/workflow_dialogue.R b/R/slashOhdsiStrategusAssistant/R/workflow_dialogue.R
@@ -12,6 +12,10 @@ compact_workflow_dialogue_context <- function(value) {
   value[keep_idx]
 }
 
+new_workflow_navigation_signal <- function(action) {
+  structure(list(action = as.character(action %||% "")), class = "workflow_navigation_signal")
+}
+
 #' Construct mutable dialogue state for interactive workflow guidance
 #' @return environment storing current step, role, and compact context
 #' @export
@@ -124,9 +128,13 @@ new_workflow_dialogue_session <- function(interactive = TRUE,
     list(handled = TRUE, value = "")
   }
 
-  readline_with_dialogue <- function(prompt) {
+  readline_with_dialogue <- function(prompt, allow_back = FALSE) {
     repeat {
       entered <- readline(prompt)
+      trimmed <- trimws(as.character(entered %||% ""))
+      if (isTRUE(allow_back) && identical(trimmed, "/back")) {
+        return(new_workflow_navigation_signal("back"))
+      }
       handled <- handle_command(entered)
       if (isTRUE(handled$handled)) next
       return(handled$value)

diff --git a/R/slashOhdsiStrategusAssistant/R/workflow_dialogue_mapping.R b/R/slashOhdsiStrategusAssistant/R/workflow_dialogue_mapping.R
@@ -13,6 +13,10 @@ normalize_incidence_dialogue_step <- function(step) {
     outcome_recommendation_resume = "outcome_selection",
     outcome_advice_call = "outcome_selection",
     outcome_improvements = "phenotype_review",
+    keeper_concept_set_generation_before = "keeper_concept_set_generation",
+    keeper_concept_set_generation_after = "keeper_concept_set_generation",
+    keeper_case_review_before = "keeper_case_review",
+    keeper_case_review_after = "keeper_case_review",
     step
   )
   as.character(mapped %||% "")
@@ -85,6 +89,10 @@ normalize_cohort_methods_dialogue_step <- function(step) {
     comparator_improvements = "phenotype_review",
     outcome_improvements = "phenotype_review",
     analytic_settings_step_by_step = "analytic_settings_collection",
+    keeper_concept_set_generation_before = "keeper_concept_set_generation",
+    keeper_concept_set_generation_after = "keeper_concept_set_generation",
+    keeper_case_review_before = "keeper_case_review",
+    keeper_case_review_after = "keeper_case_review",
     step
   )
   as.character(mapped %||% "")

diff --git a/R/slashOhdsiStrategusAssistant/README.md b/R/slashOhdsiStrategusAssistant/README.md
@@ -22,7 +22,8 @@ Primary entrypoints:
 Current shell details:
 
 - the incidence shell persists explicit TAR and strata settings to `analysis-settings/time_at_risk_settings.json`
-- both Strategus shells can run or generate ACP-based Keeper review with `keeper_review_state.json` reuse/resume artifacts
+- both Strategus shells support `/back` at major stage boundaries while preserving `/ohdsi` dialogue prompts inside the workflow
+- both Strategus shells can run or generate ACP-based Keeper review with `keeper_review_state.json` artifacts and bounded inline Keeper gates for domain generation and case review
 - generated Keeper scripts expose `ACP_TIMEOUT`, concept-set reuse/overwrite controls, and explicit row selection controls
 
 It depends on `slashOhdsiAcpClient` for ACP calls.
diff --git a/docs/R_STRATEGUS_COHORT_METHODS_SHELL.md b/docs/R_STRATEGUS_COHORT_METHODS_SHELL.md
@@ -14,7 +14,7 @@ This shell is provided as `slashOhdsiStrategusAssistant::runStrategusCohortMetho
 
 Usage examples for `slashOhdsiStrategusAssistant::runStrategusCohortMethodsShell()` live in the R package README: `R/slashOhdsiStrategusAssistant/README.md`.
 
-Workflow diagrams live in `docs/COHORT_METHODS_WORKFLOW.md`.
+Workflow diagrams live in `docs/WORKFLOW_COHORT_METHODS.md`.
 
 ## Current Stage Flow
 
@@ -27,14 +27,14 @@ Workflow diagrams live in `docs/COHORT_METHODS_WORKFLOW.md`.
    - when multiple outcome statements are suggested interactively, choose the subset to keep or enter none/0 to provide a manual outcome before editing or adding statements
 3. Role-specific phenotype recommendation / cache reuse for target, comparator, and outcome cohorts.
    Interactive runs ask for short analysis labels for selected cohorts and the comparison; labels must
-   be 50 characters or fewer because downstream Strategus/Characterization result tables use short
+   be 100 characters or fewer because downstream Strategus/Characterization result tables use short
    identifier fields.
 4. Optional cohort ID remap step to avoid collisions (`remapCohortIds`).
 5. Copy cohort JSON definitions from `indexDir/definitions` into selected cohort folders.
 6. Optional negative control and covariate concept-set IDs are still captured as placeholders.
 7. Configure one analytic-settings profile through `step_by_step`, `free_text`, or cached/function-argument inputs.
    Analytic settings are always collected in this stage and confirmed before finalization.
-8. Optionally run ACP-based Keeper review inline with reuse/resume controls.
+8. Optionally run ACP-based Keeper review inline with reuse/resume controls and bounded Keeper stage gates around domain generation and case review.
 9. Generate scripts in `scripts/` for cohort generation, Keeper review, diagnostics, and CohortMethod spec/execution.
 
 ## Analytic Settings
@@ -149,8 +149,8 @@ Generated scripts that connect to the database expect these site-specific files
   "resultsDatabaseSchema": "results_schema",
   "vocabularyDatabaseSchema": "vocab_schema",
   "cohortTable": "cohort",
-  "workFolder": "demo-strategus-cohort-incidence/work",
-  "resultsFolder": "demo-strategus-cohort-incidence/results",
+  "workFolder": "demo-strategus-cohort-method/work",
+  "resultsFolder": "demo-strategus-cohort-method/results",
   "cohortIdFieldName": "cohort_definition_id"
 }
 ```
@@ -160,6 +160,7 @@ Current Keeper specifics:
 
 - `scripts/04_keeper_review.R` uses `runKeeperReviewWorkflow(...)` and ACP flows instead of the legacy Keeper R package.
 - The script records state in `outputs/keeper_review_state.json`.
+- Inline Keeper review now exposes bounded stage gates before and after each requested concept-set domain and before and after case review.
 - The default generated script exposes `ACP_TIMEOUT`, concept-set reuse/overwrite, row reuse/resume, and explicit row selection controls such as `1-3,5`.
 - Manual editing of `keeper-case-review/concept-sets-approved/*.json` is consumable, but the concept-set approve/edit/rerun UX is still incomplete.
 
@@ -182,3 +183,5 @@ Current Keeper specifics:
 ## Notes
 
 - This stage is designed as a bridge: it combines ACP/MCP-assisted intent split, phenotype recommendation/improvement, analytic-settings recommendation, and ACP-based Keeper review with reproducible Strategus script generation.
+- Interactive runs support `/back` at major stage boundaries for study intent, target selection, comparator selection, outcome selection, study configuration, and Keeper-review entry while keeping `/ohdsi` available for contextual guidance.
+- If no Keeper artifacts exist yet, the shell suppresses the inline Keeper reuse/resume prompts instead of asking about caches unconditionally.
diff --git a/docs/R_STRATEGUS_INCIDENCE_SHELL.md b/docs/R_STRATEGUS_INCIDENCE_SHELL.md
@@ -14,7 +14,8 @@ generation for a CohortIncidence analysis.
 - Lets the user select accepted target/outcome phenotypes and optionally remap cohort IDs.
 - Calls `phenotype_improvements` for each selected cohort and lets the user apply improvements immediately.
 - Captures explicit time-at-risk and strata settings for the incidence analysis.
-- Optionally runs ACP-based Keeper review inline or writes a standalone Keeper script.
+- Supports `/back` at major stage boundaries while keeping `/ohdsi` dialogue available during the workflow.
+- Optionally runs ACP-based Keeper review inline or writes a standalone Keeper script. Inline Keeper runs now expose bounded review gates before and after each concept-set domain and before and after case review.
 - Writes reproducible scripts for recommendation replay, cohort generation, Keeper review, diagnostics, and incidence analysis.
 - Saves session state to `outputs/study_agent_state.json` for traceability.
 
@@ -86,5 +87,7 @@ Generated scripts that connect to the database expect these site-specific files
 ## Notes
 
 - If improvements were applied during the shell session, the scripts are a portable record and do not need to re-apply the same changes.
-- The shell exposes a `/ohdsi` dialogue step for `time_at_risk_configuration`, so users can ask denominator-design questions while configuring TAR and strata settings.
+- The shell exposes `/ohdsi` guidance throughout the workflow and supports `/back` at the major stage boundaries for study intent, target selection, outcome selection, TAR confirmation, and Keeper-review entry.
+- Inline Keeper review uses bounded stage gates rather than a fully generic rewind. Users can skip or rerun domains, inspect generated artifacts, adjust review settings, and inspect saved reviewed rows.
+- If no Keeper artifacts exist yet, the shell now suppresses the reuse/resume prompts instead of asking about caches unconditionally.
 - If the initial phenotype recommendations are not acceptable, the shell can request a second window of candidates and then fall back to advisory guidance.
diff --git a/docs/WORKFLOW_COHORT_METHODS.md b/docs/WORKFLOW_COHORT_METHODS.md
@@ -65,7 +65,7 @@ flowchart TD
 
   AJ --> AT["Confirm Analytic Settings"]
   AS --> AT
-  AT --> AU["Optional inline ACP Keeper review"]
+  AT --> AU["Optional inline ACP Keeper review with bounded stage gates"]
   AU --> AV["Write Outputs + Generate Scripts 02-06"]
   AV --> AW["End"]
 ```

diff --git a/scripts/demo_strategus_cohort_method.R b/scripts/demo_strategus_cohort_method.R
@@ -15,13 +15,16 @@ source(file.path(script_dir, "demo_setup.R"))
 repo_root <- set_study_agent_repo_root(start = dirname(script_dir))
 load_study_agent_r_packages(include_strategus = TRUE)
 
-Sys.setenv(ACP_TIMEOUT = "180")
+Sys.setenv(ACP_TIMEOUT = "1800") # set high because of detailed keeper concept set extraction 
 Sys.setenv(PHENOTYPE_INDEX_DIR = repo_file("data", "phenotype_index_cipher_omop"))
 invisible(connect_study_agent_acp())
 
 ### Optional reset from a prior run.
 #reset_demo_output_dir(repo_file("demo-strategus-cohort-method"), prompt = TRUE)
 #
+# Note: this clears only `demo-strategus-cohort-method`. If `incidenceOutputDir` points
+# at `demo-strategus-cohort-incidence`, you may still see cache prompts for artifacts in
+# that separate directory unless you reset it too.
 # If you already ran `scripts/test_strategus_incidence_plus_keeper.R`, this shell can
 # reuse cached target and outcome artifacts from `demo-strategus-cohort-incidence`.
 slashOhdsiStrategusAssistant::runStrategusCohortMethodsShell(
@@ -31,21 +34,21 @@ slashOhdsiStrategusAssistant::runStrategusCohortMethodsShell(
   indexDir = "data/phenotype_index_cipher_omop",
   incidenceOutputDir = "demo-strategus-cohort-incidence",
   studyIntent = paste(
-    "Compare thiazide or thiazide-like diuretic new users vs angiotensin-converting enzyme inhibitors new users for acute myocardial infarction.",
-    "Use a 365-day washout, intent-to-treat follow-up, 1:1 propensity score matching",
-    "on standardized logit with a caliper of 0.2, and a Cox model."
+    "Compare new users of GLP-1RA medications vs new users of DPP4-i medications for chronic lower respiratory disease outcomes.",
+    "Use data from 2006 to 2025. A 365-day washout, intent-to-treat with 365 days follow-up, sIPTW confounder balancing",
+    "and a Cox model to estimate time-to-event for the primary outcome."
   )
 )
 
 ## Use this to resume from cached artifacts and regenerate output scripts.
-# slashOhdsiStrategusAssistant::runStrategusCohortMethodsShell(
-#   outputDir = "demo-strategus-cohort-method",
-#   acpUrl = "http://127.0.0.1:8765",
-#   studyAgentBaseDir = repo_root,
-#   indexDir = "data/phenotype_index_cipher_omop",
-#   incidenceOutputDir = "demo-strategus-cohort-incidence",
-#   resume = TRUE,
-#   allowCache = TRUE,
-#   promptOnCache = FALSE,
-#   interactive = FALSE
-# )
+## slashOhdsiStrategusAssistant::runStrategusCohortMethodsShell(
+##   outputDir = "demo-strategus-cohort-method",
+##   acpUrl = "http://127.0.0.1:8765",
+##   studyAgentBaseDir = repo_root,
+##   indexDir = "data/phenotype_index_cipher_omop",
+##   incidenceOutputDir = "demo-strategus-cohort-incidence",
+##   resume = TRUE,
+##   allowCache = TRUE,
+##   promptOnCache = FALSE,
+##   interactive = FALSE
+## )
diff --git a/scripts/test_strategus_incidence_plus_keeper.R b/scripts/test_strategus_incidence_plus_keeper.R
@@ -16,6 +16,9 @@ load_study_agent_r_packages(include_strategus = TRUE)
 
 ## Optional reset from a prior run.
 reset_demo_output_dir(repo_file("demo-strategus-cohort-incidence"), prompt = TRUE)
+## Note: this clears only `demo-strategus-cohort-incidence`. This script does not read
+## from a second shared output directory, so any cache prompts should come only from
+## artifacts still present under that same incidence demo directory.
 
 Sys.setenv(ACP_TIMEOUT = "280")
 invisible(connect_study_agent_acp())

diff --git a/tests/test_keeper_dialogue_integration_static.py b/tests/test_keeper_dialogue_integration_static.py
@@ -13,6 +13,10 @@ def test_keeper_stage_labels_exist_for_both_shells() -> None:
 
     assert 'keeper_concept_set_generation = if (nzchar(role_label)) paste0(role_label, "Keeper concept-set generation") else "Keeper concept-set generation"' in source
     assert 'keeper_case_review = if (nzchar(role_label)) paste0(role_label, "Keeper case review") else "Keeper case review"' in source
+    assert 'keeper_concept_set_generation_before = "keeper_concept_set_generation"' in source
+    assert 'keeper_concept_set_generation_after = "keeper_concept_set_generation"' in source
+    assert 'keeper_case_review_before = "keeper_case_review"' in source
+    assert 'keeper_case_review_after = "keeper_case_review"' in source
 
 
 def test_keeper_helper_emits_metadata_only_stage_callbacks() -> None:
@@ -23,24 +27,40 @@ def test_keeper_helper_emits_metadata_only_stage_callbacks() -> None:
     assert "Sys.setenv(ACP_TIMEOUT = as.character(acp_timeout_seconds))" in source
     assert "acp_timeout_seconds = as.numeric(acp_timeout_seconds)" in source
     assert "stage_callback = NULL" in source
+    assert "stage_gate = NULL" in source
     assert "overwrite_approved_concept_sets = FALSE" in source
     assert "resume_reviews = TRUE" in source
     assert "review_row_selection = NULL" in source
     assert "parse_row_selection <- function(selection, total_rows, default_limit)" in source
     assert 'tolower(selection_text) %in% c("all", "*")' in source
-    assert 'selected_row_indices <- parse_row_selection(review_row_selection, length(row_records), review_row_limit)' in source
+    assert 'selected_row_indices <- parse_row_selection(current_review_row_selection, length(row_records), current_review_row_limit)' in source
     assert 'pending_row_indices <- selected_row_indices[!selected_row_indices %in% reviewed_indices]' in source
     assert 'approved_source <- "overwritten_from_generated"' in source
     assert 'approved_concept_sets_source = approved_source' in source
     assert 'selected_row_indices = as.list(selected_row_indices)' in source
     assert 'pending_row_indices = as.list(pending_row_indices)' in source
     assert "keeper_row = keeper_row" in source
     assert "emit_stage(" in source
+    assert 'payload_error_message <- function(payload)' in source
+    assert 'append_workflow_error <- function(errors,' in source
+    assert 'clear_workflow_errors <- function(errors,' in source
+    assert 'workflow_status <- if (length(workflow_errors)) "error" else "ok"' in source
+    assert 'normalize_stage_gate_result <- function(result)' in source
+    assert 'invoke_stage_gate <- function(step, role = "", context = list())' in source
+    assert 'apply_domain_gate_updates <- function(current_candidate_limit, current_min_record_count, updates)' in source
+    assert 'apply_review_gate_updates <- function(current_review_row_limit,' in source
+    assert 'keeper_concept_set_generation_before' in source
+    assert 'keeper_concept_set_generation_after' in source
+    assert 'keeper_case_review_before' in source
+    assert 'keeper_case_review_after' in source
+    assert 'domain_runs = domain_runs' in source
+    assert 'review_row_selection = if (is.null(current_review_row_selection)) NULL else as.character(current_review_row_selection)' in source
+    assert 'error_count = length(workflow_errors)' in source
 
 
 def _assert_shell_keeper_controls(source: str) -> None:
     assert 'keeper_acp_timeout_seconds <- as.numeric(Sys.getenv("ACP_TIMEOUT", "300"))' in source
-    assert 'prompt_yesno("Run ACP-based Keeper review now?", default = FALSE)' in source
+    assert 'Keeper review roles [outcome]: ' in source
     assert 'readline_with_dialogue("Keeper review roles [outcome]: ")' in source
     assert 'prompt_yesno("Reuse existing Keeper generated artifacts?", default = TRUE)' in source
     assert 'prompt_yesno("Replace approved concept sets with current generated output?", default = FALSE)' in source
@@ -53,11 +73,30 @@ def _assert_shell_keeper_controls(source: str) -> None:
     assert 'reuse_rows = keeper_reuse_generated_artifacts' in source
     assert 'resume_reviews = keeper_resume_reviews' in source
     assert 'review_row_selection = keeper_review_row_selection' in source
+    assert 'candidate_limit = keeper_candidate_limit' in source
+    assert 'sample_size = keeper_sample_size' in source
+    assert 'review_row_limit = keeper_review_row_limit' in source
+    assert 'stage_gate = keeper_stage_gate' in source
+    assert 'keeper_concept_set_generation_before' in source
+    assert 'keeper_concept_set_generation_after' in source
+    assert 'keeper_case_review_before' in source
+    assert 'keeper_case_review_after' in source
     assert 'state$keeper_acp_timeout_seconds <- as.numeric(keeper_acp_timeout_seconds)' in source
+    assert 'has_keeper_generated_artifacts <- dir.exists(keeper_generated_dir) &&' in source
+    assert 'if (has_keeper_generated_artifacts || has_keeper_rows_artifacts) {' in source
+    assert 'if (has_keeper_review_artifacts) {' in source
+    assert 'state$keeper_candidate_limit <- as.integer(keeper_candidate_limit)' in source
+    assert 'state$keeper_sample_size <- as.integer(keeper_sample_size)' in source
+    assert 'state$keeper_review_row_limit <- as.integer(keeper_review_row_limit)' in source
     assert 'state$keeper_reuse_generated_artifacts <- isTRUE(keeper_reuse_generated_artifacts)' in source
     assert 'state$keeper_overwrite_approved_concept_sets <- isTRUE(keeper_overwrite_approved_concept_sets)' in source
     assert 'state$keeper_resume_reviews <- isTRUE(keeper_resume_reviews)' in source
     assert 'state$keeper_review_row_selection <- keeper_review_row_selection' in source
+    assert 'else if (identical(keeper_review_result$status %||% "ok", "error")) {' in source
+    assert 'Keeper review encountered %s ACP error(s).' in source
+    assert 'error_count <- as.integer(keeper_review_result$error_count %||% 0L)' in source
+    assert 'state$keeper_review_status <- if (inherits(keeper_review_result, "error")) "error" else as.character(keeper_review_result$status %||% if (isTRUE(keeper_review_ran)) "ok" else "not_run")' in source
+    assert 'state$keeper_review_error_count <- if (inherits(keeper_review_result, "error")) 1L else as.integer(keeper_review_result$error_count %||% 0L)' in source
 
 
 def test_cohort_method_shell_offers_inline_keeper_phase() -> None: