From 75637e7348f68db99ba34dab9bd6943b15053867 Mon Sep 17 00:00:00 2001 From: jql Date: Sun, 22 Mar 2026 16:36:52 +0800 Subject: [PATCH] Harden autonomous-skill runner for resume and long-running sessions --- skills/autonomous-skill/SKILL.md | 28 +- .../autonomous-skill/scripts/run-session.sh | 738 +++++++++++++----- .../templates/executor-prompt.md | 8 +- .../tests/run-session-regression.sh | 476 +++++++++++ 4 files changed, 1031 insertions(+), 219 deletions(-) create mode 100755 skills/autonomous-skill/tests/run-session-regression.sh diff --git a/skills/autonomous-skill/SKILL.md b/skills/autonomous-skill/SKILL.md index 8b69d3d..86b54ff 100644 --- a/skills/autonomous-skill/SKILL.md +++ b/skills/autonomous-skill/SKILL.md @@ -13,23 +13,25 @@ Use the `run-session.sh` script to manage autonomous tasks: ```bash # Start a new autonomous task -~/.codex/skills/autonomous-skill/scripts/run-session.sh "Build a REST API for todo app" +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh "Build a REST API for todo app" # Continue an existing task -~/.codex/skills/autonomous-skill/scripts/run-session.sh --task-name build-rest-api-todo --continue +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh --task-name build-rest-api-todo --continue # List all tasks and their progress -~/.codex/skills/autonomous-skill/scripts/run-session.sh --list +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh --list # Show help -~/.codex/skills/autonomous-skill/scripts/run-session.sh --help +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh --help ``` The runner intentionally leaves `--model` unset so Codex uses the active `config.toml` or selected profile model by default. ## Directory Structure -All task data is stored in `.autonomous//` under the project root: +All task data is stored in `.autonomous//` under the workspace root: +- If the current directory is inside a git repo, the git top-level directory is used +- Otherwise, the current working directory is used ```text project-root/ @@ -105,7 +107,7 @@ User Request → Generate Task Name → Create .autonomous// → Exec ### Example 1: Start New Task ```bash -~/.codex/skills/autonomous-skill/scripts/run-session.sh "Build a REST API for todo app" +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh "Build a REST API for todo app" ``` Output: @@ -135,20 +137,20 @@ Continuing in 3 seconds... (Press Ctrl+C to pause) ### Example 2: Continue Existing Task ```bash -~/.codex/skills/autonomous-skill/scripts/run-session.sh --task-name build-rest-api-todo --continue +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh --task-name build-rest-api-todo --continue ``` ### Example 3: Resume with Session Context ```bash # Resume the Codex session (preserves conversation context) -~/.codex/skills/autonomous-skill/scripts/run-session.sh --task-name build-rest-api-todo --continue --resume-last +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh --task-name build-rest-api-todo --continue --resume-last ``` ### Example 4: List All Tasks ```bash -~/.codex/skills/autonomous-skill/scripts/run-session.sh --list +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh --list ``` Output: @@ -165,7 +167,7 @@ Output: ```bash # Enable network access for tasks that need API calls -~/.codex/skills/autonomous-skill/scripts/run-session.sh --network "Fetch data from GitHub API and analyze" +bash ~/.codex/skills/autonomous-skill/scripts/run-session.sh --network "Fetch data from GitHub API and analyze" ``` ## Key Files @@ -182,14 +184,14 @@ For each task in `.autonomous//`: ## Important Notes 1. **Task Isolation**: Each task has its own directory, no conflicts -2. **Task Naming**: Auto-generated from description (lowercase, hyphens, max 30 chars) +2. **Task Naming**: Auto-generated from description (lowercase, hyphens, max 30 chars). Non-ASCII descriptions fall back to `task-YYYYMMDD-HHMMSS`. 3. **Task List is Sacred**: Never delete or modify task descriptions, only mark `[x]` 4. **One Task at a Time per Session**: Focus on completing tasks thoroughly 5. **Auto-Continue**: Sessions auto-continue with 3s delay; Ctrl+C to pause -6. **Session Resumption**: Use `--resume-last` to preserve Codex conversation context +6. **Session Resumption**: Use `--resume-last` with `--continue` (or on its own to imply continue mode) to preserve Codex conversation context 7. **Configured Model**: The runner does not pass `--model`; it uses the active Codex config/profile model 8. **Network Mode**: `--network` switches the sandbox override to `danger-full-access` while keeping approval policy non-interactive -9. **Git Hygiene**: Consider adding `.autonomous/` to `.gitignore` to avoid committing logs +9. **Git Hygiene**: Avoid `git add .` blindly. Prefer explicitly adding project files plus `task_list.md` / `progress.md`, and ignore `session.log` / `session.id` if you do not want them versioned ## Codex CLI Reference diff --git a/skills/autonomous-skill/scripts/run-session.sh b/skills/autonomous-skill/scripts/run-session.sh index 8e1bf4e..6d5c367 100755 --- a/skills/autonomous-skill/scripts/run-session.sh +++ b/skills/autonomous-skill/scripts/run-session.sh @@ -3,39 +3,33 @@ # Autonomous Skill - Session Runner # Executes Codex in non-interactive mode with auto-continuation # -# Usage: -# ./run-session.sh "task description" -# ./run-session.sh --task-name --continue -# ./run-session.sh --list -# ./run-session.sh --help -# set -euo pipefail -# Configuration AUTO_CONTINUE_DELAY=3 CURRENT_TASK_NAME="" +CURRENT_TASK_DIR="" +CURRENT_LOCK_DIR="" +WORKSPACE_ROOT="" +AUTONOMOUS_DIR="" +EXECUTION_CWD="" CODEX_ARGS=() +LAST_CODEX_EXIT=0 +LAST_SESSION_ID="" -# Use CODEX_PLUGIN_ROOT or fallback to script directory if [ -n "${CODEX_PLUGIN_ROOT:-}" ]; then SKILL_DIR="${CODEX_PLUGIN_ROOT}/skills/autonomous-skill" else SKILL_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" fi -# Task directory base (in project root) -AUTONOMOUS_DIR=".autonomous" - -# Colors for output RED='\033[0;31m' GREEN='\033[0;32m' YELLOW='\033[1;33m' BLUE='\033[0;34m' CYAN='\033[0;36m' -NC='\033[0m' # No Color +NC='\033[0m' -# Print colored output print_header() { echo -e "${BLUE}==========================================${NC}" echo -e "${BLUE} $1${NC}" @@ -58,96 +52,157 @@ print_info() { echo -e "${CYAN}ℹ $1${NC}" } -# Handle Ctrl+C gracefully +release_task_lock() { + if [ -n "${CURRENT_LOCK_DIR:-}" ] && [ -d "$CURRENT_LOCK_DIR" ]; then + local owner_pid="" + if [ -f "$CURRENT_LOCK_DIR/pid" ]; then + owner_pid="$(cat "$CURRENT_LOCK_DIR/pid" 2>/dev/null || true)" + fi + if [ -z "$owner_pid" ] || [ "$owner_pid" = "$$" ]; then + rm -rf "$CURRENT_LOCK_DIR" + fi + fi +} + +cleanup() { + release_task_lock +} + handle_interrupt() { echo "" + release_task_lock if [ -n "${CURRENT_TASK_NAME:-}" ]; then - print_warning "Interrupted. Progress saved in $AUTONOMOUS_DIR/$CURRENT_TASK_NAME/" + print_warning "Interrupted. Progress saved in $CURRENT_TASK_DIR/" echo "Run again to continue: $0 --task-name $CURRENT_TASK_NAME --continue" else print_warning "Interrupted." fi + trap - EXIT exit 130 } -# Show help show_help() { echo "Autonomous Skill - Session Runner (Codex)" echo "" echo "Usage:" - echo " $0 \"task description\" Start new task (auto-generates name)" - echo " $0 --task-name --continue Continue specific task" - echo " $0 --list List all tasks" - echo " $0 --help Show this help" + echo " bash $0 \"task description\" Start new task (auto-generates name)" + echo " bash $0 --task-name --continue Continue specific task" + echo " bash $0 --list List all tasks" + echo " bash $0 --help Show this help" echo "" echo "Options:" - echo " --task-name Specify task name explicitly" + echo " --task-name Specify task name explicitly (must start with A-Z a-z 0-9; safe chars: . _ -)" echo " --continue, -c Continue existing task" echo " --no-auto-continue Don't auto-continue after session" echo " --max-sessions N Limit to N sessions" echo " --list List all existing tasks" - echo " --resume-last Resume the most recent Codex session" + echo " --resume-last Resume the saved Codex session for the selected task" echo " --network Enable danger-full-access sandbox for tasks that need broader shell access" echo "" echo "Examples:" - echo " $0 \"Build a REST API for todo app\"" - echo " $0 --task-name build-rest-api --continue" - echo " $0 --task-name build-rest-api --continue --resume-last" - echo " $0 --list" + echo " bash $0 \"Build a REST API for todo app\"" + echo " bash $0 --task-name build-rest-api --continue" + echo " bash $0 --task-name build-rest-api --continue --resume-last" + echo " bash $0 --list" echo "" - echo "Task Directory: $AUTONOMOUS_DIR//" + echo "Task Directory: /.autonomous/ (git root if available, else current directory)" + echo "Execution Directory: current working directory where the runner was launched" echo "Skill Directory: $SKILL_DIR" echo "Model Selection: Uses the active Codex config/profile model (no --model override)" echo "" } -# Generate task name from description +detect_workspace_root() { + if command -v git >/dev/null 2>&1; then + local git_root="" + git_root="$(git rev-parse --show-toplevel 2>/dev/null || true)" + if [ -n "$git_root" ]; then + echo "$git_root" + return + fi + fi + pwd -P +} + +resolve_workspace_paths() { + EXECUTION_CWD="$(pwd -P)" + WORKSPACE_ROOT="$(detect_workspace_root)" + AUTONOMOUS_DIR="$WORKSPACE_ROOT/.autonomous" +} + generate_task_name() { local desc="${1:-}" - # Convert to lowercase, replace non-alphanumeric with hyphens, collapse multiple hyphens, trim - local result - result=$(echo "$desc" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/-/g' | sed 's/--*/-/g' | cut -c1-30 | sed 's/^-//' | sed 's/-$//') + local result="" + + result="$(printf '%s' "$desc" | tr '[:upper:]' '[:lower:]' | sed 's/[^a-z0-9]/-/g' | sed 's/--*/-/g' | cut -c1-30 | sed 's/^-//' | sed 's/-$//')" - # If result is empty (description was non-ASCII or only special chars), use timestamp fallback if [ -z "$result" ]; then result="task-$(date +%Y%m%d-%H%M%S)" - print_warning "Non-alphanumeric description detected, using generated name: $result" + print_warning "Non-alphanumeric description detected, using generated name: $result" >&2 fi - echo "$result" + + printf '%s\n' "$result" } -# Validate task name (security: prevent path traversal) validate_task_name() { local name="$1" - # Reject if contains path traversal attempts or invalid characters - if [[ "$name" == *".."* ]] || [[ "$name" == *"/"* ]] || [[ "$name" == *"\\"* ]]; then - print_error "Invalid task name: '$name' (contains path traversal characters)" - return 1 - fi - # Reject if empty + if [ -z "$name" ]; then print_error "Task name cannot be empty" return 1 fi - # Reject if starts with hyphen (could be confused with options) + if [[ "$name" == -* ]]; then print_error "Task name cannot start with a hyphen" return 1 fi + + if [[ "$name" == .* ]]; then + print_error "Task name cannot start with a dot" + return 1 + fi + + if [[ "$name" == *".."* ]] || [[ "$name" == *"/"* ]] || [[ "$name" == *"\\"* ]]; then + print_error "Invalid task name: '$name' (contains path traversal characters)" + return 1 + fi + + if [[ ! "$name" =~ ^[A-Za-z0-9._-]+$ ]]; then + print_error "Invalid task name: '$name' (allowed characters: letters, numbers, dot, underscore, hyphen)" + return 1 + fi + return 0 } -# Verify required commands exist +require_option_value() { + local option="$1" + local value="${2:-}" + + if [ -z "$value" ] || [[ "$value" == --* ]]; then + print_error "Option '$option' requires a value" + exit 1 + fi +} + +validate_non_negative_integer() { + local name="$1" + local value="$2" + + if [[ ! "$value" =~ ^[0-9]+$ ]]; then + print_error "$name must be a non-negative integer" + exit 1 + fi +} + check_dependencies() { - if ! command -v codex &> /dev/null; then + if ! command -v codex >/dev/null 2>&1; then print_error "Required command 'codex' not found" echo "Please install Codex CLI: https://github.com/openai/codex" exit 1 fi } -# Build Codex CLI args for unattended execution. -# Intentionally do not pass --model so Codex uses the active config/profile model. build_codex_args() { local enable_network="$1" local sandbox_mode="workspace-write" @@ -161,57 +216,187 @@ build_codex_args() { -c "sandbox_mode=\"$sandbox_mode\"" --skip-git-repo-check --json + -C "$EXECUTION_CWD" ) } -# List all tasks -list_tasks() { - print_header "AUTONOMOUS TASKS" +ensure_unique_task_name() { + local base_name="$1" + local candidate="$base_name" + local suffix=2 + + while [ -e "$AUTONOMOUS_DIR/$candidate" ]; do + local suffix_part="-$suffix" + local max_base_len=$((30 - ${#suffix_part})) + if [ "$max_base_len" -lt 1 ]; then + max_base_len=1 + fi + candidate="$(printf '%s' "$base_name" | cut -c1-"$max_base_len")$suffix_part" + suffix=$((suffix + 1)) + done + + printf '%s\n' "$candidate" +} + +get_path_mtime() { + local path="$1" + + if [ ! -e "$path" ]; then + echo "0" + return + fi + + if stat -f '%m' "$path" >/dev/null 2>&1; then + stat -f '%m' "$path" + else + stat -c '%Y' "$path" + fi +} + +get_task_activity_mtime() { + local task_dir="$1" + local latest=0 + local current=0 + local candidate="" + + for candidate in \ + "$task_dir/session.log" \ + "$task_dir/progress.md" \ + "$task_dir/task_list.md" \ + "$task_dir/session.id" \ + "$task_dir" + do + current="$(get_path_mtime "$candidate")" + if [ "$current" -gt "$latest" ]; then + latest="$current" + fi + done + + echo "$latest" +} + +find_recent_task_name() { + local latest_name="" + local latest_mtime=0 + local task_dir="" + local task_mtime=0 if [ ! -d "$AUTONOMOUS_DIR" ]; then - print_warning "No tasks found. Directory $AUTONOMOUS_DIR does not exist." echo "" return fi - # Check if directory is empty (no subdirectories) - local dir_count - dir_count=$(find "$AUTONOMOUS_DIR" -mindepth 1 -maxdepth 1 -type d 2>/dev/null | wc -l) - if [ "$dir_count" -eq 0 ]; then - print_warning "No tasks found in $AUTONOMOUS_DIR/" + for task_dir in "$AUTONOMOUS_DIR"/*; do + [ -d "$task_dir" ] || continue + [ -f "$task_dir/task_list.md" ] || continue + + task_mtime="$(get_task_activity_mtime "$task_dir")" + if [ "$task_mtime" -gt "$latest_mtime" ]; then + latest_mtime="$task_mtime" + latest_name="$(basename "$task_dir")" + fi + done + + echo "$latest_name" +} + +count_total_tasks() { + local task_list="$1" + + if [ ! -f "$task_list" ]; then + echo "0" + return + fi + + awk ' + /^## Tasks$/ {in_tasks=1; next} + /^## / && in_tasks {in_tasks=0} + in_tasks && /^- \[[ x]\] Task [0-9]+:/ {count++} + END {print count+0} + ' "$task_list" +} + +count_completed_tasks() { + local task_list="$1" + + if [ ! -f "$task_list" ]; then + echo "0" + return + fi + + awk ' + /^## Tasks$/ {in_tasks=1; next} + /^## / && in_tasks {in_tasks=0} + in_tasks && /^- \[x\] Task [0-9]+:/ {count++} + END {print count+0} + ' "$task_list" +} + +task_list_has_invalid_entries() { + local task_list="$1" + + awk ' + /^## Tasks$/ {in_tasks=1; next} + /^## / && in_tasks {in_tasks=0} + in_tasks && /^- \[/ { + if ($0 !~ /^- \[[ x]\] Task [0-9]+:/) { + invalid=1 + next + } + + line=$0 + sub(/^.*Task /, "", line) + sub(/:.*/, "", line) + task_num=line+0 + + if (seen && task_num <= previous) { + invalid=1 + next + } + + previous=task_num + seen=1 + } + END { exit invalid ? 0 : 1 } + ' "$task_list" +} + +list_tasks() { + print_header "AUTONOMOUS TASKS" + + if [ ! -d "$AUTONOMOUS_DIR" ]; then + print_warning "No tasks found. Directory $AUTONOMOUS_DIR does not exist." echo "" return fi local found=0 - for task_dir in "$AUTONOMOUS_DIR"/*/; do - # Skip if glob didn't match (empty directory case) + local task_dir="" + + for task_dir in "$AUTONOMOUS_DIR"/*; do [ -d "$task_dir" ] || continue - local task_name - task_name=$(basename "$task_dir") - local task_list="$task_dir/task_list.md" + local task_name="" + local task_list="" + local total=0 + local done_count=0 + local percent=0 + local session_info="" + + task_name="$(basename "$task_dir")" + task_list="$task_dir/task_list.md" if [ -f "$task_list" ]; then - local total - local done_count - total=$(grep -c '^\- \[' "$task_list" 2>/dev/null | tr -d '\n' || echo "0") - done_count=$(grep -c '^\- \[x\]' "$task_list" 2>/dev/null | tr -d '\n' || echo "0") - # Ensure we have valid numbers - total=${total:-0} - done_count=${done_count:-0} - - # Safe percent calculation (avoid divide by zero) - local percent=0 + total="$(count_total_tasks "$task_list")" + done_count="$(count_completed_tasks "$task_list")" + if [ "$total" -gt 0 ]; then percent=$((done_count * 100 / total)) fi - # Check for session ID - local session_info="" if [ -f "$task_dir/session.id" ]; then - local sid - sid=$(cat "$task_dir/session.id" | head -c 8) + local sid="" + sid="$(head -c 8 < "$task_dir/session.id")" session_info=" [session: ${sid}...]" fi @@ -220,88 +405,196 @@ list_tasks() { else echo -e " ${YELLOW}○${NC} $task_name ($done_count/$total - $percent%)$session_info" fi - found=$((found + 1)) else echo -e " ${RED}?${NC} $task_name (no task_list.md)" - found=$((found + 1)) fi + + found=$((found + 1)) done if [ "$found" -eq 0 ]; then - print_warning "No valid tasks found in $AUTONOMOUS_DIR/" + print_warning "No tasks found in $AUTONOMOUS_DIR/" fi echo "" } -# Check if task exists task_exists() { local task_name="$1" [ -f "$AUTONOMOUS_DIR/$task_name/task_list.md" ] } -# Get task directory +task_artifacts_exist() { + local task_dir="$1" + [ -e "$task_dir/task_list.md" ] || [ -e "$task_dir/progress.md" ] || [ -e "$task_dir/session.id" ] || [ -e "$task_dir/session.log" ] +} + +validate_task_state() { + local task_dir="$1" + local context="$2" + local task_list="$task_dir/task_list.md" + local progress_file="$task_dir/progress.md" + local total=0 + local done=0 + + if [ ! -f "$task_list" ]; then + print_error "$context is missing $task_list" + return 1 + fi + + if [ ! -f "$progress_file" ]; then + print_error "$context is missing $progress_file" + return 1 + fi + + total="$(count_total_tasks "$task_list")" + done="$(count_completed_tasks "$task_list")" + + if [ "$total" -le 0 ]; then + print_error "$context has an invalid task_list.md (no parseable tasks)" + return 1 + fi + + if [ "$done" -gt "$total" ]; then + print_error "$context has an invalid task_list.md (completed tasks exceed total tasks)" + return 1 + fi + + if task_list_has_invalid_entries "$task_list"; then + print_error "$context has an invalid task_list.md (contains invalid or out-of-order task entries)" + return 1 + fi + + return 0 +} + get_task_dir() { local task_name="$1" echo "$AUTONOMOUS_DIR/$task_name" } -# Get progress from task_list.md get_progress() { local task_dir="$1" - if [ -f "$task_dir/task_list.md" ]; then - local total - local done - total=$(grep -c '^\- \[' "$task_dir/task_list.md" 2>/dev/null | tr -d '\n' || echo "0") - done=$(grep -c '^\- \[x\]' "$task_dir/task_list.md" 2>/dev/null | tr -d '\n' || echo "0") - # Ensure we have valid numbers - total=${total:-0} - done=${done:-0} - echo "$done/$total" - else - echo "0/0" - fi + local task_list="$task_dir/task_list.md" + + echo "$(count_completed_tasks "$task_list")/$(count_total_tasks "$task_list")" } -# Check if all tasks are complete is_complete() { local task_dir="$1" - if [ -f "$task_dir/task_list.md" ]; then - local total - local done - total=$(grep -c '^\- \[' "$task_dir/task_list.md" 2>/dev/null | tr -d '\n' || echo "0") - done=$(grep -c '^\- \[x\]' "$task_dir/task_list.md" 2>/dev/null | tr -d '\n' || echo "0") - # Ensure we have valid numbers - total=${total:-0} - done=${done:-0} - if [ "$done" -eq "$total" ] && [ "$total" -gt 0 ]; then - return 0 # complete - fi + local task_list="$task_dir/task_list.md" + local total=0 + local done=0 + + total="$(count_total_tasks "$task_list")" + done="$(count_completed_tasks "$task_list")" + + if [ "$total" -gt 0 ] && [ "$done" -eq "$total" ]; then + return 0 fi - return 1 # not complete + + return 1 } -# Extract session ID from mixed Codex output (JSON events plus possible warnings) extract_session_id() { local log_file="$1" - # Prefer thread_id from thread.started; fall back to any thread_id or session_id local id="" - id=$(grep '"type":"thread.started"' "$log_file" 2>/dev/null | tail -n 1 | sed -n 's/.*"thread_id":"\([^"]*\)".*/\1/p') + + id="$(grep '"type":"thread.started"' "$log_file" 2>/dev/null | tail -n 1 | sed -n 's/.*"thread_id":"\([^"]*\)".*/\1/p')" if [ -z "$id" ]; then - id=$(grep '"thread_id"' "$log_file" 2>/dev/null | tail -n 1 | sed -n 's/.*"thread_id":"\([^"]*\)".*/\1/p') + id="$(grep '"thread_id"' "$log_file" 2>/dev/null | tail -n 1 | sed -n 's/.*"thread_id":"\([^"]*\)".*/\1/p')" fi if [ -z "$id" ]; then - id=$(grep '"session_id"' "$log_file" 2>/dev/null | tail -n 1 | sed -n 's/.*"session_id":"\([^"]*\)".*/\1/p') + id="$(grep '"session_id"' "$log_file" 2>/dev/null | tail -n 1 | sed -n 's/.*"session_id":"\([^"]*\)".*/\1/p')" fi + echo "$id" } -# Run initializer session +persist_session_id() { + local task_dir="$1" + local session_id="$2" + local temp_file="$task_dir/session.id.tmp" + + if [ -z "$session_id" ]; then + return + fi + + printf '%s\n' "$session_id" > "$temp_file" + mv "$temp_file" "$task_dir/session.id" + print_info "Session ID saved: $session_id" +} + +render_template() { + local template_path="$1" + local task_dir="$2" + local template="" + + template="$(cat "$template_path")" + printf '%s' "${template//\{TASK_DIR\}/$task_dir}" +} + +run_codex_capture() { + local task_dir="$1" + shift + + local run_log="" + run_log="$(mktemp "$task_dir/session.run.XXXXXX.log")" + + set +e + "$@" 2>&1 | tee "$run_log" + LAST_CODEX_EXIT=${PIPESTATUS[0]} + set -e + + cat "$run_log" >> "$task_dir/session.log" + LAST_SESSION_ID="$(extract_session_id "$run_log")" + rm -f "$run_log" +} + +acquire_task_lock() { + local task_dir="$1" + local lock_dir="$task_dir/.runner.lock" + local existing_pid="" + + mkdir -p "$task_dir" + + if mkdir "$lock_dir" 2>/dev/null; then + printf '%s\n' "$$" > "$lock_dir/pid" + CURRENT_LOCK_DIR="$lock_dir" + return 0 + fi + + if [ -f "$lock_dir/pid" ]; then + existing_pid="$(cat "$lock_dir/pid" 2>/dev/null || true)" + fi + + if [ -n "$existing_pid" ] && kill -0 "$existing_pid" 2>/dev/null; then + print_error "Task '$CURRENT_TASK_NAME' is already running (pid: $existing_pid)" + return 1 + fi + + print_warning "Removing stale lock for task '$CURRENT_TASK_NAME'${existing_pid:+ (pid: $existing_pid)}" + rm -rf "$lock_dir" + + if mkdir "$lock_dir" 2>/dev/null; then + printf '%s\n' "$$" > "$lock_dir/pid" + CURRENT_LOCK_DIR="$lock_dir" + return 0 + fi + + print_error "Task '$CURRENT_TASK_NAME' is already running" + return 1 +} + run_initializer() { local task_name="$1" local task_desc="$2" local enable_network="$3" - local task_dir=$(get_task_dir "$task_name") + local task_dir="" + local init_prompt="" + local prompt="" + + task_dir="$(get_task_dir "$task_name")" print_header "INITIALIZER SESSION" echo "Task: $task_desc" @@ -309,97 +602,98 @@ run_initializer() { echo "Task Directory: $task_dir" echo "" - # Create task directory - mkdir -p "$task_dir" - - # Read initializer prompt template and substitute {TASK_DIR} placeholder - local init_prompt=$(cat "$SKILL_DIR/templates/initializer-prompt.md" | sed "s|{TASK_DIR}|$task_dir|g") - + init_prompt="$(render_template "$SKILL_DIR/templates/initializer-prompt.md" "$task_dir")" build_codex_args "$enable_network" - # Execute Codex in non-interactive mode - codex exec "${CODEX_ARGS[@]}" "Task: $task_desc + prompt="Task: $task_desc Task Name: $task_name Task Directory: $task_dir You are the Initializer Agent. Create task_list.md and progress.md in the $task_dir directory. All task files must be created in $task_dir/, not in the current directory. -$init_prompt" 2>&1 | tee "$task_dir/session.log" +$init_prompt" + + run_codex_capture "$task_dir" codex exec "${CODEX_ARGS[@]}" "$prompt" + persist_session_id "$task_dir" "$LAST_SESSION_ID" + + if [ "$LAST_CODEX_EXIT" -ne 0 ]; then + print_error "Initializer session failed (exit $LAST_CODEX_EXIT)" + return "$LAST_CODEX_EXIT" + fi + + if [ ! -f "$task_dir/task_list.md" ] || [ ! -f "$task_dir/progress.md" ]; then + print_error "Initializer did not create required files in $task_dir" + return 1 + fi - # Extract and save session ID for potential resumption - local session_id - session_id=$(extract_session_id "$task_dir/session.log") - if [ -n "$session_id" ]; then - echo "$session_id" > "$task_dir/session.id" - print_info "Session ID saved: $session_id" + if ! validate_task_state "$task_dir" "Initializer output for task '$task_name'"; then + return 1 fi echo "" print_success "Initializer session complete" } -# Run executor session run_executor() { local task_name="$1" local resume_last="$2" local enable_network="$3" - local task_dir=$(get_task_dir "$task_name") + local task_dir="" + local exec_prompt="" + local prompt="" + local session_id="" + local resumed=false + + task_dir="$(get_task_dir "$task_name")" print_header "EXECUTOR SESSION" echo "Task Name: $task_name" echo "Task Directory: $task_dir" echo "" - # Read current state - local task_list=$(cat "$task_dir/task_list.md" 2>/dev/null || echo "No task list found") - local progress_notes=$(cat "$task_dir/progress.md" 2>/dev/null || echo "No progress notes yet") - - # Read executor prompt template and substitute {TASK_DIR} placeholder - local exec_prompt=$(cat "$SKILL_DIR/templates/executor-prompt.md" | sed "s|{TASK_DIR}|$task_dir|g") - + exec_prompt="$(render_template "$SKILL_DIR/templates/executor-prompt.md" "$task_dir")" build_codex_args "$enable_network" - # Build the prompt - local prompt="Continue working on the task. + prompt="Continue working on the task. Task Name: $task_name Task Directory: $task_dir +Current Progress: $(get_progress "$task_dir") -You are the Executor Agent. Complete tasks and update files in the $task_dir directory. All task files are in $task_dir/, not in the current directory. - -Current task_list.md: -$task_list - -Previous progress notes: -$progress_notes +You are the Executor Agent. Read $task_dir/task_list.md and $task_dir/progress.md directly before making changes. Do not rely on stale summaries. $exec_prompt" - # Check if we should resume the previous session if [ "$resume_last" = true ] && [ -f "$task_dir/session.id" ]; then - local session_id - session_id=$(cat "$task_dir/session.id") + session_id="$(cat "$task_dir/session.id")" print_info "Resuming session: $session_id" + run_codex_capture "$task_dir" codex exec resume "${CODEX_ARGS[@]}" "$session_id" "$prompt" - # Resume the previous session with new instructions - codex exec resume "${CODEX_ARGS[@]}" "$session_id" "$prompt" 2>&1 | tee -a "$task_dir/session.log" - else - # Start a new session - codex exec "${CODEX_ARGS[@]}" "$prompt" 2>&1 | tee -a "$task_dir/session.log" - - # Save session ID - local session_id - session_id=$(extract_session_id "$task_dir/session.log") - if [ -n "$session_id" ]; then - echo "$session_id" > "$task_dir/session.id" - print_info "Session ID saved: $session_id" + if [ "$LAST_CODEX_EXIT" -eq 0 ]; then + resumed=true + else + print_warning "Resume failed for task '$task_name' (exit $LAST_CODEX_EXIT). Falling back to a new session." >&2 fi fi + if [ "$resumed" = false ]; then + run_codex_capture "$task_dir" codex exec "${CODEX_ARGS[@]}" "$prompt" + fi + + persist_session_id "$task_dir" "$LAST_SESSION_ID" + + if [ "$LAST_CODEX_EXIT" -ne 0 ]; then + print_error "Executor session failed (exit $LAST_CODEX_EXIT)" + return "$LAST_CODEX_EXIT" + fi + + if ! validate_task_state "$task_dir" "Executor output for task '$task_name'"; then + return 1 + fi + echo "" print_success "Executor session complete" } -# Main execution loop main() { local task_desc="" local task_name="" @@ -409,20 +703,24 @@ main() { local continue_mode=false local resume_last=false local enable_network=false + local show_help_flag=false + local list_flag=false + local task_name_generated=false + local task_dir="" - # Parse arguments while [[ $# -gt 0 ]]; do - case $1 in + case "$1" in --help|-h) - show_help - exit 0 + show_help_flag=true + shift ;; --list|-l) - list_tasks - exit 0 + list_flag=true + shift ;; --task-name|-n) - task_name="${2:-}" + require_option_value "$1" "${2:-}" + task_name="$2" shift 2 ;; --continue|-c) @@ -434,11 +732,13 @@ main() { shift ;; --max-sessions) - max_sessions="${2:-0}" + require_option_value "$1" "${2:-}" + max_sessions="$2" shift 2 ;; --resume-last) resume_last=true + continue_mode=true shift ;; --network) @@ -446,29 +746,42 @@ main() { shift ;; *) + if [ -n "$task_desc" ]; then + print_error "Unexpected extra argument: $1" + exit 1 + fi task_desc="$1" shift ;; esac done - # Determine task name + validate_non_negative_integer "--max-sessions" "$max_sessions" + resolve_workspace_paths + + if [ "$show_help_flag" = true ]; then + show_help + exit 0 + fi + + if [ "$list_flag" = true ]; then + list_tasks + exit 0 + fi + if [ -z "$task_name" ] && [ -n "$task_desc" ]; then - task_name=$(generate_task_name "$task_desc") + task_name="$(generate_task_name "$task_desc")" + task_name_generated=true print_info "Generated task name: $task_name" fi - # Validate if [ -z "$task_name" ]; then if [ "$continue_mode" = true ]; then - # Try to find most recent task - if [ -d "$AUTONOMOUS_DIR" ]; then - task_name=$(ls -t "$AUTONOMOUS_DIR" 2>/dev/null | head -1) || true - fi + task_name="$(find_recent_task_name)" if [ -z "$task_name" ]; then print_error "No task name provided and no existing tasks found" - echo "Usage: $0 \"Your task description\"" - echo " $0 --task-name --continue" + echo "Usage: bash $0 \"Your task description\"" + echo " bash $0 --task-name --continue" exit 1 fi print_info "Continuing most recent task: $task_name" @@ -479,54 +792,79 @@ main() { fi fi - # Security: Validate task name to prevent path traversal if ! validate_task_name "$task_name"; then exit 1 fi - # Check that codex command is available before starting sessions check_dependencies - local task_dir - task_dir=$(get_task_dir "$task_name") + mkdir -p "$AUTONOMOUS_DIR" + + if [ "$task_name_generated" = true ] && [ "$continue_mode" = false ]; then + local unique_task_name="" + unique_task_name="$(ensure_unique_task_name "$task_name")" + if [ "$unique_task_name" != "$task_name" ]; then + print_warning "Task name '$task_name' already exists, using unique name: $unique_task_name" + task_name="$unique_task_name" + fi + fi + + task_dir="$(get_task_dir "$task_name")" CURRENT_TASK_NAME="$task_name" + CURRENT_TASK_DIR="$task_dir" + + if ! acquire_task_lock "$task_dir"; then + exit 1 + fi if [ "$enable_network" = true ]; then print_warning "Network mode uses sandbox_mode=danger-full-access with approval_policy=never. Use only in an isolated environment." fi - # Main loop while true; do echo "" print_header "SESSION $session_num - $task_name" - # Show current progress if task_exists "$task_name"; then + if ! validate_task_state "$task_dir" "Existing task '$task_name'"; then + exit 1 + fi echo "Progress: $(get_progress "$task_dir")" echo "" + + if is_complete "$task_dir"; then + echo "" + print_success "ALL TASKS COMPLETED!" + echo "" + echo "Task directory: $task_dir" + echo "Final task list:" + cat "$task_dir/task_list.md" + exit 0 + fi fi - # Determine which agent to run if task_exists "$task_name"; then - # Task list exists - run executor run_executor "$task_name" "$resume_last" "$enable_network" - # Only resume on first iteration if requested - resume_last=false else - # No task list - run initializer + if task_artifacts_exist "$task_dir"; then + print_error "Task '$task_name' is in an invalid state. Ensure task_list.md and progress.md both exist and task_list.md still contains valid Task N entries." + exit 1 + fi if [ -z "$task_desc" ]; then print_error "Task '$task_name' not found and no description provided" - echo "Provide a task description to initialize: $0 \"Your task description\"" + echo "Provide a task description to initialize: bash $0 \"Your task description\"" exit 1 fi run_initializer "$task_name" "$task_desc" "$enable_network" fi - # Show progress after session + if ! validate_task_state "$task_dir" "Task '$task_name' state after session $session_num"; then + exit 1 + fi + echo "" echo "=== Progress: $(get_progress "$task_dir") ===" - # Check completion if is_complete "$task_dir"; then echo "" print_success "ALL TASKS COMPLETED!" @@ -537,19 +875,16 @@ main() { exit 0 fi - # Check max sessions - if [ $max_sessions -gt 0 ] && [ $session_num -ge $max_sessions ]; then + if [ "$max_sessions" -gt 0 ] && [ "$session_num" -ge "$max_sessions" ]; then print_warning "Reached maximum sessions ($max_sessions)" exit 0 fi - # Auto-continue logic if [ "$auto_continue" = true ]; then echo "" echo "Continuing in $AUTO_CONTINUE_DELAY seconds... (Press Ctrl+C to pause)" - - # Sleep with countdown - for i in $(seq $AUTO_CONTINUE_DELAY -1 1); do + local i=0 + for i in $(seq "$AUTO_CONTINUE_DELAY" -1 1); do echo -ne "\r$i... " sleep 1 done @@ -564,8 +899,7 @@ main() { done } -# Handle Ctrl+C gracefully -trap handle_interrupt INT +trap cleanup EXIT +trap handle_interrupt INT TERM -# Run main main "$@" diff --git a/skills/autonomous-skill/templates/executor-prompt.md b/skills/autonomous-skill/templates/executor-prompt.md index 2083e0b..8b0090a 100644 --- a/skills/autonomous-skill/templates/executor-prompt.md +++ b/skills/autonomous-skill/templates/executor-prompt.md @@ -32,9 +32,9 @@ git log --oneline -10 2>/dev/null || echo "No git repository" # 7. Count remaining tasks echo "Remaining tasks:" -grep -c '^\- \[ \]' {TASK_DIR}/task_list.md 2>/dev/null || echo "0" +grep -Ec '^\- \[ \] Task [0-9]+:' {TASK_DIR}/task_list.md 2>/dev/null || echo "0" echo "Completed tasks:" -grep -c '^\- \[x\]' {TASK_DIR}/task_list.md 2>/dev/null || echo "0" +grep -Ec '^\- \[x\] Task [0-9]+:' {TASK_DIR}/task_list.md 2>/dev/null || echo "0" ``` Understanding the task_list.md is critical - it contains all the work that needs to be done. @@ -57,7 +57,7 @@ The previous session may have introduced issues. Before implementing anything ne - Note them in `{TASK_DIR}/progress.md` - Fix critical issues before moving on -- Mark broken tasks back to `[ ]` in `{TASK_DIR}/task_list.md` if necessary +- Do not rewrite task lines; keep task descriptions unchanged and record any regressions in `{TASK_DIR}/progress.md` --- @@ -178,7 +178,7 @@ Add a new session entry: Make a descriptive commit: ```bash -git add . +git add {TASK_DIR}/task_list.md {TASK_DIR}/progress.md [project-files...] git commit -m "Complete Task 3: Implement user model - Added User class with CRUD operations diff --git a/skills/autonomous-skill/tests/run-session-regression.sh b/skills/autonomous-skill/tests/run-session-regression.sh new file mode 100755 index 0000000..3a484af --- /dev/null +++ b/skills/autonomous-skill/tests/run-session-regression.sh @@ -0,0 +1,476 @@ +#!/bin/bash + +set -euo pipefail + +SKILL_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)" +SCRIPT_PATH="$SKILL_DIR/scripts/run-session.sh" + +TEST_ROOT="$(mktemp -d /tmp/autonomous-skill-test-XXXXXX)" +trap 'rm -rf "$TEST_ROOT"' EXIT + +PASS_COUNT=0 +FAIL_COUNT=0 + +fail() { + echo "FAIL: $1" + FAIL_COUNT=$((FAIL_COUNT + 1)) +} + +pass() { + echo "PASS: $1" + PASS_COUNT=$((PASS_COUNT + 1)) +} + +assert_contains() { + local haystack="$1" + local needle="$2" + local message="$3" + + if [[ "$haystack" == *"$needle"* ]]; then + pass "$message" + else + fail "$message" + echo " expected to contain: $needle" + echo " actual: $haystack" + fi +} + +assert_not_contains() { + local haystack="$1" + local needle="$2" + local message="$3" + + if [[ "$haystack" == *"$needle"* ]]; then + fail "$message" + echo " unexpected content: $needle" + echo " actual: $haystack" + else + pass "$message" + fi +} + +assert_file_exists() { + local path="$1" + local message="$2" + + if [ -e "$path" ]; then + pass "$message" + else + fail "$message" + echo " missing: $path" + fi +} + +assert_file_missing() { + local path="$1" + local message="$2" + + if [ -e "$path" ]; then + fail "$message" + echo " unexpected file: $path" + else + pass "$message" + fi +} + +assert_exit_code() { + local actual="$1" + local expected="$2" + local message="$3" + + if [ "$actual" -eq "$expected" ]; then + pass "$message" + else + fail "$message" + echo " expected exit: $expected" + echo " actual exit: $actual" + fi +} + +make_fake_codex() { + local bin_dir="$1" + local mode="$2" + mkdir -p "$bin_dir" + + cat > "$bin_dir/codex" <> "\${FAKE_CODEX_INVOCATIONS}" + +case "$mode" in + success) + printf '{"type":"thread.started","thread_id":"fake-thread-1"}\n' + ;; + success-with-files) + printf '{"type":"thread.started","thread_id":"fake-thread-1"}\n' + if [[ "\$*" == *"Initializer Agent"* ]]; then + task_dir="\${FAKE_TASK_DIR:-}" + if [ -z "\$task_dir" ]; then + task_dir=\$(printf '%s' "\$*" | sed -n 's/.*Task Directory: \([^[:space:]]*\).*/\1/p') + fi + mkdir -p "\$task_dir" + cat > "\$task_dir/task_list.md" <<'TASK' +# Task List: Demo +## Tasks +- [ ] Task 1: demo +TASK + cat > "\$task_dir/progress.md" <<'PROGRESS' +# Progress Log +PROGRESS + fi + ;; + resume-fails-then-exec-succeeds) + if [ "\${1:-}" = "exec" ] && [ "\${2:-}" = "resume" ]; then + printf '{"type":"thread.started","thread_id":"resume-thread"}\n' + exit 23 + fi + printf '{"type":"thread.started","thread_id":"fresh-thread"}\n' + ;; + log-args) + printf '{"type":"thread.started","thread_id":"log-thread"}\n' + ;; + break-task-state) + printf '{"type":"thread.started","thread_id":"break-thread"}\n' + if [[ "\$*" == *"Executor Agent"* ]]; then + task_dir="\${FAKE_TASK_DIR:-}" + if [ -z "\$task_dir" ]; then + task_dir=\$(printf '%s' "\$*" | sed -n 's/.*Task Directory: \([^[:space:]]*\).*/\1/p') + fi + cat > "\$task_dir/task_list.md" <<'TASK' +# Task List +## Tasks +- [ ] 1. invalid +TASK + fi + ;; + *) + echo "unknown fake codex mode: $mode" >&2 + exit 99 + ;; +esac +EOF + + chmod +x "$bin_dir/codex" +} + +run_case() { + local case_name="$1" + shift + + local case_dir="$TEST_ROOT/$case_name" + mkdir -p "$case_dir/bin" "$case_dir/work" + export FAKE_CODEX_INVOCATIONS="$case_dir/invocations.log" + : > "$FAKE_CODEX_INVOCATIONS" + + ( + export PATH="$case_dir/bin:$PATH" + cd "$case_dir/work" + set +e + bash "$SCRIPT_PATH" "$@" >"$case_dir/stdout.log" 2>"$case_dir/stderr.log" + echo $? >"$case_dir/exit_code" + ) +} + +test_non_ascii_task_name() { + local case_name="non-ascii-task-name" + make_fake_codex "$TEST_ROOT/$case_name/bin" "success-with-files" + unset FAKE_TASK_DIR + + run_case "$case_name" "中文任务" --no-auto-continue --max-sessions 1 + + local stderr_text + stderr_text="$(cat "$TEST_ROOT/$case_name/stderr.log")" + assert_contains "$stderr_text" "Non-alphanumeric description detected" "非 ASCII 任务名会给出 warning" + + local generated_dir + generated_dir="$(find "$TEST_ROOT/$case_name/work/.autonomous" -mindepth 1 -maxdepth 1 -type d | head -n 1)" + assert_contains "$generated_dir" "/task-" "非 ASCII 任务名会回退到 task- 时间戳目录" + assert_not_contains "$generated_dir" $'\033' "不会生成带 ANSI 的异常目录" +} + +test_initializer_requires_files() { + local case_name="initializer-requires-files" + make_fake_codex "$TEST_ROOT/$case_name/bin" "success" + export FAKE_TASK_DIR="$TEST_ROOT/$case_name/work/.autonomous/build-api" + + run_case "$case_name" "Build API" --no-auto-continue --max-sessions 1 + + local exit_code + exit_code="$(cat "$TEST_ROOT/$case_name/exit_code")" + local combined_text + combined_text="$(cat "$TEST_ROOT/$case_name/stdout.log" "$TEST_ROOT/$case_name/stderr.log")" + assert_exit_code "$exit_code" 1 "initializer 缺少任务文件时应失败" + assert_contains "$combined_text" "Initializer did not create required files" "initializer 缺少文件时会给出明确错误" +} + +test_resume_fallback() { + local case_name="resume-fallback" + make_fake_codex "$TEST_ROOT/$case_name/bin" "resume-fails-then-exec-succeeds" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/existing" + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/task_list.md" <<'EOF_TASK' +# Task List +## Tasks +- [ ] Task 1: demo +EOF_TASK + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/progress.md" <<'EOF_PROGRESS' +# Progress Log +EOF_PROGRESS + echo "stale-session" > "$TEST_ROOT/$case_name/work/.autonomous/existing/session.id" + + run_case "$case_name" --task-name existing --continue --resume-last --no-auto-continue --max-sessions 1 + + local exit_code + exit_code="$(cat "$TEST_ROOT/$case_name/exit_code")" + local combined_text + combined_text="$(cat "$TEST_ROOT/$case_name/stdout.log" "$TEST_ROOT/$case_name/stderr.log")" + local invocations + invocations="$(cat "$TEST_ROOT/$case_name/invocations.log")" + local invocation_count + invocation_count="$(grep -c '^exec' "$TEST_ROOT/$case_name/invocations.log" || true)" + + assert_exit_code "$exit_code" 0 "resume 失败后会自动回退到新 session" + assert_contains "$combined_text" "Resume failed" "resume 失败会输出告警" + assert_contains "$invocations" "exec resume" "会先尝试 resume" + assert_exit_code "$invocation_count" 2 "resume 失败后会追加一次普通 exec" +} + +test_recent_task_ignores_plain_files() { + local case_name="recent-task-selection" + make_fake_codex "$TEST_ROOT/$case_name/bin" "success-with-files" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/good" + cat > "$TEST_ROOT/$case_name/work/.autonomous/good/task_list.md" <<'EOF_TASK' +# Task List +## Tasks +- [ ] Task 1: demo +EOF_TASK + cat > "$TEST_ROOT/$case_name/work/.autonomous/good/progress.md" <<'EOF_PROGRESS' +# Progress Log +EOF_PROGRESS + touch "$TEST_ROOT/$case_name/work/.autonomous/zzz-file" + export FAKE_TASK_DIR="$TEST_ROOT/$case_name/work/.autonomous/good" + + run_case "$case_name" --continue --no-auto-continue --max-sessions 1 + + local stdout_text + stdout_text="$(cat "$TEST_ROOT/$case_name/stdout.log")" + assert_contains "$stdout_text" "Continuing most recent task: good" "继续最近任务时只选择有效目录" +} + +test_executor_prompt_omits_full_state() { + local case_name="prompt-omits-full-state" + make_fake_codex "$TEST_ROOT/$case_name/bin" "log-args" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/existing" + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/task_list.md" <<'EOF_TASK' +# Task List +## Tasks +- [ ] Task 1: sentinel-task-list +EOF_TASK + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/progress.md" <<'EOF_PROGRESS' +# Progress Log +sentinel-progress-notes +EOF_PROGRESS + + run_case "$case_name" --task-name existing --continue --no-auto-continue --max-sessions 1 + + local invocations + invocations="$(cat "$TEST_ROOT/$case_name/invocations.log")" + assert_not_contains "$invocations" "sentinel-task-list" "executor prompt 不再内联完整 task_list" + assert_not_contains "$invocations" "sentinel-progress-notes" "executor prompt 不再内联完整 progress" +} + +test_git_root_autonomous_dir() { + local case_name="git-root-autonomous-dir" + make_fake_codex "$TEST_ROOT/$case_name/bin" "success-with-files" + mkdir -p "$TEST_ROOT/$case_name/work/repo/subdir" + ( + cd "$TEST_ROOT/$case_name/work/repo" + git init >/dev/null 2>&1 + ) + export FAKE_TASK_DIR="$TEST_ROOT/$case_name/work/repo/.autonomous/build-api" + + ( + export PATH="$TEST_ROOT/$case_name/bin:$PATH" + cd "$TEST_ROOT/$case_name/work/repo/subdir" + set +e + bash "$SCRIPT_PATH" "Build API" --no-auto-continue --max-sessions 1 >"$TEST_ROOT/$case_name/stdout.log" 2>"$TEST_ROOT/$case_name/stderr.log" + echo $? >"$TEST_ROOT/$case_name/exit_code" + ) + + assert_file_exists "$TEST_ROOT/$case_name/work/repo/.autonomous/build-api" "git 仓库内会把 .autonomous 写到 repo root" + assert_file_missing "$TEST_ROOT/$case_name/work/repo/subdir/.autonomous/build-api" "git 子目录下不会误写本地 .autonomous" +} + +test_progress_output_is_clean() { + local case_name="progress-output-clean" + make_fake_codex "$TEST_ROOT/$case_name/bin" "log-args" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/existing" + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/task_list.md" <<'EOF_TASK' +# Task List +## Tasks +- [ ] Task 1: demo +EOF_TASK + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/progress.md" <<'EOF_PROGRESS' +# Progress Log +EOF_PROGRESS + + run_case "$case_name" --task-name existing --continue --no-auto-continue --max-sessions 1 + + local stdout_text + stdout_text="$(cat "$TEST_ROOT/$case_name/stdout.log")" + assert_contains "$stdout_text" "Progress: 0/1" "0 命中任务时进度输出保持 0/1" +} + +test_invalid_task_name_rejected() { + local case_name="invalid-task-name" + + run_case "$case_name" --task-name . --no-auto-continue --max-sessions 1 + + local exit_code + exit_code="$(cat "$TEST_ROOT/$case_name/exit_code")" + local combined_text + combined_text="$(cat "$TEST_ROOT/$case_name/stdout.log" "$TEST_ROOT/$case_name/stderr.log")" + assert_exit_code "$exit_code" 1 "任务名 dot 会被拒绝" + assert_contains "$combined_text" "Task name cannot start with a dot" "非法任务名会输出明确错误" + + run_case "${case_name}-hidden" --task-name .hidden --no-auto-continue --max-sessions 1 + + exit_code="$(cat "$TEST_ROOT/${case_name}-hidden/exit_code")" + combined_text="$(cat "$TEST_ROOT/${case_name}-hidden/stdout.log" "$TEST_ROOT/${case_name}-hidden/stderr.log")" + assert_exit_code "$exit_code" 1 "点前缀任务名会被拒绝" + assert_contains "$combined_text" "Task name cannot start with a dot" "点前缀任务名也会输出明确错误" +} + +test_stale_lock_is_recovered() { + local case_name="stale-lock-recovery" + make_fake_codex "$TEST_ROOT/$case_name/bin" "success-with-files" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/demo/.runner.lock" + printf '999999\n' > "$TEST_ROOT/$case_name/work/.autonomous/demo/.runner.lock/pid" + export FAKE_TASK_DIR="$TEST_ROOT/$case_name/work/.autonomous/demo" + + run_case "$case_name" --task-name demo "Demo task" --no-auto-continue --max-sessions 1 + + local exit_code + exit_code="$(cat "$TEST_ROOT/$case_name/exit_code")" + local combined_text + combined_text="$(cat "$TEST_ROOT/$case_name/stdout.log" "$TEST_ROOT/$case_name/stderr.log")" + assert_exit_code "$exit_code" 0 "stale lock 会被自动清理并继续执行" + assert_contains "$combined_text" "stale lock" "清理 stale lock 时会输出提示" + assert_file_exists "$TEST_ROOT/$case_name/work/.autonomous/demo/task_list.md" "恢复 stale lock 后会继续初始化任务" +} + +test_generated_task_name_collision_creates_new_task() { + local case_name="generated-task-name-collision" + local existing_name="aaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" + local unique_name="aaaaaaaaaaaaaaaaaaaaaaaaaaaa-2" + make_fake_codex "$TEST_ROOT/$case_name/bin" "success-with-files" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/$existing_name" + cat > "$TEST_ROOT/$case_name/work/.autonomous/$existing_name/task_list.md" <<'EOF_TASK' +# Task List +## Tasks +- [ ] Task 1: existing +EOF_TASK + cat > "$TEST_ROOT/$case_name/work/.autonomous/$existing_name/progress.md" <<'EOF_PROGRESS' +# Progress Log +EOF_PROGRESS + export FAKE_TASK_DIR="$TEST_ROOT/$case_name/work/.autonomous/$unique_name" + + run_case "$case_name" "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa" --no-auto-continue --max-sessions 1 + + local exit_code + exit_code="$(cat "$TEST_ROOT/$case_name/exit_code")" + local stdout_text + stdout_text="$(cat "$TEST_ROOT/$case_name/stdout.log")" + local dir_count + dir_count="$(find "$TEST_ROOT/$case_name/work/.autonomous" -mindepth 2 -maxdepth 2 -name task_list.md | wc -l | tr -d ' ')" + assert_exit_code "$exit_code" 0 "任务名碰撞后新任务初始化仍然成功" + assert_contains "$stdout_text" "INITIALIZER SESSION" "任务名碰撞时会创建新任务而不是误续跑旧任务" + assert_exit_code "$dir_count" 2 "任务名碰撞后会保留两个独立任务目录" +} + +test_incomplete_task_state_fails_fast() { + local case_name="incomplete-task-state" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/existing" + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/task_list.md" <<'EOF_TASK' +# Task List +## Tasks +- [ ] Task 1: demo +EOF_TASK + + run_case "$case_name" --task-name existing --continue --no-auto-continue --max-sessions 1 + + local exit_code + exit_code="$(cat "$TEST_ROOT/$case_name/exit_code")" + local combined_text + combined_text="$(cat "$TEST_ROOT/$case_name/stdout.log" "$TEST_ROOT/$case_name/stderr.log")" + assert_exit_code "$exit_code" 1 "缺失 progress.md 时会 fail fast" + assert_contains "$combined_text" "is missing" "缺失状态文件时会输出明确错误" +} + +test_executor_invalid_state_after_run_fails() { + local case_name="executor-invalid-state-after-run" + make_fake_codex "$TEST_ROOT/$case_name/bin" "break-task-state" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/existing" + export FAKE_TASK_DIR="$TEST_ROOT/$case_name/work/.autonomous/existing" + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/task_list.md" <<'EOF_TASK' +# Task List +## Tasks +- [ ] Task 1: demo +EOF_TASK + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/progress.md" <<'EOF_PROGRESS' +# Progress Log +EOF_PROGRESS + + run_case "$case_name" --task-name existing --continue --no-auto-continue --max-sessions 1 + + local exit_code + exit_code="$(cat "$TEST_ROOT/$case_name/exit_code")" + local combined_text + combined_text="$(cat "$TEST_ROOT/$case_name/stdout.log" "$TEST_ROOT/$case_name/stderr.log")" + assert_exit_code "$exit_code" 1 "executor 破坏状态文件后会 fail fast" + assert_contains "$combined_text" "invalid task_list.md" "状态损坏时会输出明确错误" +} + +test_mixed_valid_and_invalid_task_entries_fail() { + local case_name="mixed-valid-invalid-task-entries" + make_fake_codex "$TEST_ROOT/$case_name/bin" "log-args" + mkdir -p "$TEST_ROOT/$case_name/work/.autonomous/existing" + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/task_list.md" <<'EOF_TASK' +# Task List +## Tasks +- [ ] Task 1: valid +- [ ] 2. invalid +EOF_TASK + cat > "$TEST_ROOT/$case_name/work/.autonomous/existing/progress.md" <<'EOF_PROGRESS' +# Progress Log +EOF_PROGRESS + + run_case "$case_name" --task-name existing --continue --no-auto-continue --max-sessions 1 + + local exit_code + exit_code="$(cat "$TEST_ROOT/$case_name/exit_code")" + local combined_text + combined_text="$(cat "$TEST_ROOT/$case_name/stdout.log" "$TEST_ROOT/$case_name/stderr.log")" + assert_exit_code "$exit_code" 1 "混合合法和非法任务行时会 fail fast" + assert_contains "$combined_text" "invalid task_list.md" "混合非法任务行时会输出明确错误" +} + +test_non_ascii_task_name +test_initializer_requires_files +test_resume_fallback +test_recent_task_ignores_plain_files +test_executor_prompt_omits_full_state +test_git_root_autonomous_dir +test_progress_output_is_clean +test_invalid_task_name_rejected +test_stale_lock_is_recovered +test_generated_task_name_collision_creates_new_task +test_incomplete_task_state_fails_fast +test_executor_invalid_state_after_run_fails +test_mixed_valid_and_invalid_task_entries_fail + +echo "Passed: $PASS_COUNT" +echo "Failed: $FAIL_COUNT" + +if [ "$FAIL_COUNT" -ne 0 ]; then + exit 1 +fi