Skip to content

Commit aa0190f

Browse files
committed
update run.py: simplify args, and integrate new config structure
1 parent 394e8ce commit aa0190f

File tree

1 file changed

+56
-61
lines changed

1 file changed

+56
-61
lines changed

omni-reader/run.py

+56-61
Original file line numberDiff line numberDiff line change
@@ -28,13 +28,18 @@
2828
from dotenv import load_dotenv
2929
from PIL import Image
3030

31-
from pipelines.batch_pipeline import run_ocr_batch_pipeline
31+
from pipelines.batch_pipeline import run_batch_ocr_pipeline
3232
from pipelines.evaluation_pipeline import run_ocr_evaluation_pipeline
3333
from utils.config import (
3434
get_image_paths,
3535
list_available_ground_truth_files,
3636
load_config,
37+
override_batch_config,
38+
override_evaluation_config,
3739
print_config_summary,
40+
select_config_path,
41+
validate_batch_config,
42+
validate_evaluation_config,
3843
)
3944
from utils.model_configs import DEFAULT_MODEL, MODEL_CONFIGS
4045
from utils.ocr_processing import run_ocr
@@ -265,7 +270,7 @@ def run_ui_mode(args, parser):
265270

266271
def run_pipeline_mode(args, parser):
267272
"""Run the application in full pipeline mode with ZenML tracking."""
268-
# List available ground truth files
273+
# List available ground truth files if requested
269274
if args.list_ground_truth_files:
270275
gt_files = list_available_ground_truth_files(directory=args.ground_truth_dir)
271276
if gt_files:
@@ -276,65 +281,57 @@ def run_pipeline_mode(args, parser):
276281
print(f"No ground truth files found in '{args.ground_truth_dir}'")
277282
return
278283

279-
# Load configuration
284+
# Determine pipeline mode and select config path
285+
evaluation_mode = args.eval
286+
280287
if args.config:
281-
config = load_config(args.config)
288+
config_path = args.config
282289
else:
283-
parser.error("Please provide a configuration file with --config")
290+
config_path = select_config_path(evaluation_mode)
291+
print(f"Auto-selecting config file: {config_path}")
292+
293+
if not os.path.exists(config_path):
294+
parser.error(f"Config file not found: {config_path}")
284295
return
285296

286-
# Override config with CLI arguments if provided
287-
if args.image_paths or args.image_folder or args.custom_prompt:
288-
# Create parameters section if it doesn't exist
289-
if "parameters" not in config:
290-
config["parameters"] = {}
291-
292-
# Update parameters with CLI arguments
293-
if args.image_paths:
294-
config["parameters"]["input_image_paths"] = args.image_paths
295-
if args.image_folder:
296-
config["parameters"]["input_image_folder"] = args.image_folder
297-
if args.custom_prompt:
298-
# Create steps section if needed
299-
if "steps" not in config:
300-
config["steps"] = {}
301-
if "ocr_processor" not in config["steps"]:
302-
config["steps"]["ocr_processor"] = {"parameters": {}}
303-
304-
# Set custom prompt
305-
config["steps"]["ocr_processor"]["parameters"]["custom_prompt"] = args.custom_prompt
306-
307-
print_config_summary(config)
308-
309-
# Create output directories if needed
310-
if (
311-
"steps" in config
312-
and "save_ocr_results" in config["steps"]
313-
and config["steps"]["save_ocr_results"].get("parameters", {}).get("save_results", False)
314-
):
315-
results_dir = config["steps"]["save_ocr_results"]["parameters"].get(
316-
"results_directory", "ocr_results"
317-
)
318-
os.makedirs(results_dir, exist_ok=True)
319-
320-
if (
321-
"steps" in config
322-
and "save_visualization" in config["steps"]
323-
and config["steps"]["save_visualization"].get("parameters", {}).get("save_locally", False)
324-
):
325-
viz_dir = config["steps"]["save_visualization"]["parameters"].get(
326-
"visualization_directory", "visualizations"
327-
)
328-
os.makedirs(viz_dir, exist_ok=True)
297+
# Load the configuration
298+
try:
299+
config = load_config(config_path)
300+
except (ValueError, FileNotFoundError) as e:
301+
parser.error(f"Error loading configuration: {str(e)}")
302+
return
303+
304+
cli_args = {
305+
"image_paths": args.image_paths,
306+
"image_folder": args.image_folder,
307+
"custom_prompt": args.custom_prompt,
308+
"ground_truth_dir": args.ground_truth_dir,
309+
}
310+
311+
# Override configuration with CLI arguments if provided
312+
try:
313+
if evaluation_mode:
314+
config = override_evaluation_config(config, cli_args)
315+
validate_evaluation_config(config)
316+
else:
317+
config = override_batch_config(config, cli_args)
318+
validate_batch_config(config)
319+
except ValueError as e:
320+
parser.error(f"Configuration error: {str(e)}")
321+
return
329322

330-
# Run pipeline in specified mode
331-
mode = config.get("parameters", {}).get("mode", "evaluation")
332-
if mode == "batch":
333-
print("Running OCR Batch Pipeline...")
334-
run_ocr_batch_pipeline(config)
335-
else: # Default to evaluation mode
336-
print("Running OCR Evaluation Pipeline...")
337-
run_ocr_evaluation_pipeline(config)
323+
print_config_summary(config, is_evaluation_config=evaluation_mode)
324+
325+
try:
326+
if evaluation_mode:
327+
print("Running OCR Evaluation Pipeline...")
328+
run_ocr_evaluation_pipeline(config)
329+
else:
330+
print("Running OCR Batch Pipeline...")
331+
run_batch_ocr_pipeline(config)
332+
except Exception as e:
333+
print(f"Error running pipeline: {str(e)}")
334+
return
338335

339336

340337
def main():
@@ -355,14 +352,12 @@ def main():
355352
config_group.add_argument(
356353
"--config",
357354
type=str,
358-
default="configs/config.yaml",
359355
help="Path to YAML configuration file (for pipeline mode)",
360356
)
361357
config_group.add_argument(
362-
"--create-default-config",
363-
type=str,
364-
metavar="PATH",
365-
help="Create a default configuration file at the specified path and exit",
358+
"--eval",
359+
action="store_true",
360+
help="Run in evaluation pipeline mode (defaults to batch pipeline if not specified)",
366361
)
367362

368363
# Ground truth utilities (pipeline mode)

0 commit comments

Comments
 (0)