28
28
from dotenv import load_dotenv
29
29
from PIL import Image
30
30
31
- from pipelines .batch_pipeline import run_ocr_batch_pipeline
31
+ from pipelines .batch_pipeline import run_batch_ocr_pipeline
32
32
from pipelines .evaluation_pipeline import run_ocr_evaluation_pipeline
33
33
from utils .config import (
34
34
get_image_paths ,
35
35
list_available_ground_truth_files ,
36
36
load_config ,
37
+ override_batch_config ,
38
+ override_evaluation_config ,
37
39
print_config_summary ,
40
+ select_config_path ,
41
+ validate_batch_config ,
42
+ validate_evaluation_config ,
38
43
)
39
44
from utils .model_configs import DEFAULT_MODEL , MODEL_CONFIGS
40
45
from utils .ocr_processing import run_ocr
@@ -265,7 +270,7 @@ def run_ui_mode(args, parser):
265
270
266
271
def run_pipeline_mode (args , parser ):
267
272
"""Run the application in full pipeline mode with ZenML tracking."""
268
- # List available ground truth files
273
+ # List available ground truth files if requested
269
274
if args .list_ground_truth_files :
270
275
gt_files = list_available_ground_truth_files (directory = args .ground_truth_dir )
271
276
if gt_files :
@@ -276,65 +281,57 @@ def run_pipeline_mode(args, parser):
276
281
print (f"No ground truth files found in '{ args .ground_truth_dir } '" )
277
282
return
278
283
279
- # Load configuration
284
+ # Determine pipeline mode and select config path
285
+ evaluation_mode = args .eval
286
+
280
287
if args .config :
281
- config = load_config ( args .config )
288
+ config_path = args .config
282
289
else :
283
- parser .error ("Please provide a configuration file with --config" )
290
+ config_path = select_config_path (evaluation_mode )
291
+ print (f"Auto-selecting config file: { config_path } " )
292
+
293
+ if not os .path .exists (config_path ):
294
+ parser .error (f"Config file not found: { config_path } " )
284
295
return
285
296
286
- # Override config with CLI arguments if provided
287
- if args .image_paths or args .image_folder or args .custom_prompt :
288
- # Create parameters section if it doesn't exist
289
- if "parameters" not in config :
290
- config ["parameters" ] = {}
291
-
292
- # Update parameters with CLI arguments
293
- if args .image_paths :
294
- config ["parameters" ]["input_image_paths" ] = args .image_paths
295
- if args .image_folder :
296
- config ["parameters" ]["input_image_folder" ] = args .image_folder
297
- if args .custom_prompt :
298
- # Create steps section if needed
299
- if "steps" not in config :
300
- config ["steps" ] = {}
301
- if "ocr_processor" not in config ["steps" ]:
302
- config ["steps" ]["ocr_processor" ] = {"parameters" : {}}
303
-
304
- # Set custom prompt
305
- config ["steps" ]["ocr_processor" ]["parameters" ]["custom_prompt" ] = args .custom_prompt
306
-
307
- print_config_summary (config )
308
-
309
- # Create output directories if needed
310
- if (
311
- "steps" in config
312
- and "save_ocr_results" in config ["steps" ]
313
- and config ["steps" ]["save_ocr_results" ].get ("parameters" , {}).get ("save_results" , False )
314
- ):
315
- results_dir = config ["steps" ]["save_ocr_results" ]["parameters" ].get (
316
- "results_directory" , "ocr_results"
317
- )
318
- os .makedirs (results_dir , exist_ok = True )
319
-
320
- if (
321
- "steps" in config
322
- and "save_visualization" in config ["steps" ]
323
- and config ["steps" ]["save_visualization" ].get ("parameters" , {}).get ("save_locally" , False )
324
- ):
325
- viz_dir = config ["steps" ]["save_visualization" ]["parameters" ].get (
326
- "visualization_directory" , "visualizations"
327
- )
328
- os .makedirs (viz_dir , exist_ok = True )
297
+ # Load the configuration
298
+ try :
299
+ config = load_config (config_path )
300
+ except (ValueError , FileNotFoundError ) as e :
301
+ parser .error (f"Error loading configuration: { str (e )} " )
302
+ return
303
+
304
+ cli_args = {
305
+ "image_paths" : args .image_paths ,
306
+ "image_folder" : args .image_folder ,
307
+ "custom_prompt" : args .custom_prompt ,
308
+ "ground_truth_dir" : args .ground_truth_dir ,
309
+ }
310
+
311
+ # Override configuration with CLI arguments if provided
312
+ try :
313
+ if evaluation_mode :
314
+ config = override_evaluation_config (config , cli_args )
315
+ validate_evaluation_config (config )
316
+ else :
317
+ config = override_batch_config (config , cli_args )
318
+ validate_batch_config (config )
319
+ except ValueError as e :
320
+ parser .error (f"Configuration error: { str (e )} " )
321
+ return
329
322
330
- # Run pipeline in specified mode
331
- mode = config .get ("parameters" , {}).get ("mode" , "evaluation" )
332
- if mode == "batch" :
333
- print ("Running OCR Batch Pipeline..." )
334
- run_ocr_batch_pipeline (config )
335
- else : # Default to evaluation mode
336
- print ("Running OCR Evaluation Pipeline..." )
337
- run_ocr_evaluation_pipeline (config )
323
+ print_config_summary (config , is_evaluation_config = evaluation_mode )
324
+
325
+ try :
326
+ if evaluation_mode :
327
+ print ("Running OCR Evaluation Pipeline..." )
328
+ run_ocr_evaluation_pipeline (config )
329
+ else :
330
+ print ("Running OCR Batch Pipeline..." )
331
+ run_batch_ocr_pipeline (config )
332
+ except Exception as e :
333
+ print (f"Error running pipeline: { str (e )} " )
334
+ return
338
335
339
336
340
337
def main ():
@@ -355,14 +352,12 @@ def main():
355
352
config_group .add_argument (
356
353
"--config" ,
357
354
type = str ,
358
- default = "configs/config.yaml" ,
359
355
help = "Path to YAML configuration file (for pipeline mode)" ,
360
356
)
361
357
config_group .add_argument (
362
- "--create-default-config" ,
363
- type = str ,
364
- metavar = "PATH" ,
365
- help = "Create a default configuration file at the specified path and exit" ,
358
+ "--eval" ,
359
+ action = "store_true" ,
360
+ help = "Run in evaluation pipeline mode (defaults to batch pipeline if not specified)" ,
366
361
)
367
362
368
363
# Ground truth utilities (pipeline mode)
0 commit comments