vcf_analysis/scripts/cli_enhanced_validation.py at main · elementalcollision/vcf_analysis · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""
Enhanced CLI Documentation Validation Engine

Comprehensive validation engine that integrates existing CLI documentation tools
with advanced AST parsing, multi-format docstring support, and CI/CD integration.

This engine serves as the unified entry point for all CLI documentation validation,
providing:
- Integration with existing validation tools
- Advanced AST-based analysis
- Multi-format docstring parsing
- Performance optimization with caching
- CI/CD integration with proper exit codes and reporting

Usage:
    python scripts/cli_enhanced_validation.py [--mode=comprehensive] [--format=console] [--verbose]

Exit codes:
    0: All validation passed
    1: Validation failures found
    2: Script execution error
"""

import ast
import sys
import os
import time
import json
import logging
import hashlib
import argparse
import pickle
import threading
import subprocess
from pathlib import Path
from typing import Dict, List, Optional, Any, Union, Set, Callable, TYPE_CHECKING
from dataclasses import dataclass, field
from enum import Enum
from abc import ABC, abstractmethod
from concurrent.futures import ThreadPoolExecutor

# Add project root to path for imports
sys.path.insert(0, str(Path(__file__).parent.parent))

# Import existing validation tools
from scripts.validate_cli_documentation import CLIDocumentationValidator, ValidationResult as ModuleValidationResult
from scripts.cli_documentation_style_guide import CLIDocstringValidator, CLIDocumentationStandards

# Import docstring_parser for structured parsing
try:
    from docstring_parser import parse as parse_docstring
    DOCSTRING_PARSER_AVAILABLE = True
except ImportError:
    DOCSTRING_PARSER_AVAILABLE = False
    parse_docstring = None

# Optional imports for configuration management
if TYPE_CHECKING:
    # Type checking imports - these are for IDE/linter support only
    from pydantic_settings import BaseSettings, SettingsConfigDict
    from pydantic import Field
    import tomli
    import tomllib

# Runtime imports with graceful fallback
PYDANTIC_SETTINGS_AVAILABLE = False
try:
    from pydantic import BaseSettings, Field
    from pydantic.v1.config import Extra
    PYDANTIC_SETTINGS_AVAILABLE = True
except ImportError:
    try:
        # Try newer pydantic-settings package
        from pydantic_settings import BaseSettings, SettingsConfigDict  # type: ignore
        from pydantic import Field  # type: ignore
        PYDANTIC_SETTINGS_AVAILABLE = True
    except ImportError:
        # No pydantic support available
        PYDANTIC_SETTINGS_AVAILABLE = False

# TOML support detection
TOML_SUPPORT = False
try:
    # Python 3.11+ built-in support
    import tomllib  # type: ignore
    TOML_SUPPORT = True
    TOML_LOADER = tomllib
except ImportError:
    try:
        # Python < 3.11 fallback
        import tomli as TOML_LOADER  # type: ignore
        TOML_SUPPORT = True
    except ImportError:
        TOML_SUPPORT = False
        TOML_LOADER = None


# Validation modes
class ValidationMode(Enum):
    QUICK = "quick"
    COMPREHENSIVE = "comprehensive"
    STRICT = "strict"


class OutputFormat(Enum):
    CONSOLE = "console"
    JSON = "json"
    GITHUB = "github"


@dataclass
class CodeDefinition:
    """Represents a code definition (function, method, class) found in source code."""
    name: str
    type: str  # 'function', 'method', 'class'
    file_path: str
    line_number: int
    docstring: Optional[str]
    parent_class: Optional[str] = None
    args: List[str] = field(default_factory=list)
    is_cli_handler: bool = False


@dataclass
class CoverageReport:
    """Docstring coverage analysis report."""
    total_definitions: int
    documented_definitions: int
    undocumented_definitions: int
    coverage_percentage: float
    undocumented_items: List[CodeDefinition] = field(default_factory=list)


@dataclass
class ValidationLocation:
    """Represents the location of a validation issue."""
    file_path: str
    line_number: int
    column_number: Optional[int] = None


@dataclass
class ValidationIssue:
    """Represents a validation issue found during analysis (updated structure)."""
    type: str  # Issue type (e.g., 'missing_docstring', 'parameter_mismatch')
    severity: str  # 'error', 'warning', 'info'
    message: str
    location: ValidationLocation
    suggestions: List[str] = field(default_factory=list)


@dataclass
class FileValidationResult:
    """Results of validating a single file."""
    file_path: str
    definitions: List[CodeDefinition]
    issues: List[ValidationIssue]
    total_definitions: int
    documented_count: int
    coverage_percentage: float
    execution_time: float
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass
class ValidationReport:
    """Comprehensive validation report."""
    total_files: int
    total_definitions: int
    coverage_percentage: float
    execution_time: float
    issues: List[ValidationIssue]
    metadata: Dict[str, Any] = field(default_factory=dict)


@dataclass
class Parameter:
    """Represents a function parameter from docstring or signature."""
    name: str
    type_hint: Optional[str] = None
    description: Optional[str] = None
    is_optional: bool = False
    default_value: Optional[str] = None


@dataclass
class ReturnInfo:
    """Represents return value information from docstring."""
    type_hint: Optional[str] = None
    description: Optional[str] = None


@dataclass
class ExceptionInfo:
    """Represents exception information from docstring."""
    type_name: str
    description: Optional[str] = None


@dataclass
class ParsedDocstring:
    """Represents a parsed docstring with structured components."""
    raw_text: str
    detected_format: str
    summary: str
    description: str
    parameters: List[Parameter] = field(default_factory=list)
    returns: Optional[ReturnInfo] = None
    raises: List[ExceptionInfo] = field(default_factory=list)
    examples: List[str] = field(default_factory=list)
    parsing_successful: bool = True
    parsing_errors: List[str] = field(default_factory=list)


@dataclass
class FunctionSignature:
    """Represents a function signature extracted from AST."""
    parameters: List[Parameter]
    return_type: Optional[str] = None


@dataclass
class StructureValidation:
    """Results of structured docstring validation."""
    is_valid: bool
    missing_parameters: List[str] = field(default_factory=list)
    extra_parameters: List[str] = field(default_factory=list)
    missing_return_doc: bool = False
    undocumented_exceptions: List[str] = field(default_factory=list)
    type_mismatches: List[str] = field(default_factory=list)
    format_issues: List[str] = field(default_factory=list)
    suggestions: List[str] = field(default_factory=list)


class CacheManager:
    """
    File-based caching system for validation results with content-hash invalidation.

    Provides efficient caching of AST parsing results and validation outcomes
    with automatic cache invalidation when file content changes.
    """

    def __init__(self, cache_dir: str = ".validation-cache", ttl_hours: int = 24, max_size_mb: int = 100):
        """Initialize a cache manager with specified directory, time-to-live, and size constraints.
        Parameters:
            - cache_dir (str): Directory where cached data will be stored. Defaults to ".validation-cache".
            - ttl_hours (int): Time-to-live for cached items in hours. Defaults to 24 hours.
            - max_size_mb (int): Maximum cache size in megabytes. Defaults to 100MB.
        Returns:
            - None: This method initializes the cache manager; there is no return value.
        Processing Logic:
            - Converts ttl_hours to seconds for internal use.
            - Ensures that the cache directory exists, creating it if necessary.
            - Initializes cache hit/miss statistics counters."""
        self.cache_dir = Path(cache_dir)
        self.ttl_seconds = ttl_hours * 3600
        self.max_size_bytes = max_size_mb * 1024 * 1024
        self.lock = threading.Lock()
        self.logger = logging.getLogger(__name__ + '.CacheManager')

        # Ensure cache directory exists
        self.cache_dir.mkdir(exist_ok=True)

        # Cache hit/miss statistics
        self.hits = 0
        self.misses = 0

        self.logger.debug(f"CacheManager initialized: dir={cache_dir}, ttl={ttl_hours}h, max_size={max_size_mb}MB")

    def _get_file_hash(self, file_path: str) -> str:
        """Generate content hash for a file."""
        try:
            with open(file_path, 'rb') as f:
                content = f.read()
            return hashlib.sha256(content).hexdigest()[:16]  # Use first 16 chars for performance
        except Exception as e:
            self.logger.debug(f"Failed to hash {file_path}: {e}")
            return "invalid"

    def _get_cache_key(self, file_path: str, validation_type: str = "ast") -> str:
        """Generate cache key for a file and validation type."""
        file_hash = self._get_file_hash(file_path)
        path_normalized = str(Path(file_path).resolve())
        key_content = f"{path_normalized}:{validation_type}:{file_hash}"
        return hashlib.md5(key_content.encode()).hexdigest()

    def _get_cache_file_path(self, cache_key: str) -> Path:
        """Get the cache file path for a given cache key."""
        return self.cache_dir / f"{cache_key}.cache"

    def get(self, file_path: str, validation_type: str = "ast") -> Optional[Any]:
        """
        Retrieve cached validation result for a file.

        Args:
            file_path: Path to the file being validated
            validation_type: Type of validation (ast, module, style)

        Returns:
            Cached result if valid, None if cache miss
        """
        with self.lock:
            try:
                cache_key = self._get_cache_key(file_path, validation_type)
                cache_file = self._get_cache_file_path(cache_key)

                if not cache_file.exists():
                    self.misses += 1
                    return None

                # Check if cache entry has expired
                cache_age = time.time() - cache_file.stat().st_mtime
                if cache_age > self.ttl_seconds:
                    cache_file.unlink()  # Remove expired cache
                    self.misses += 1
                    return None

                # Load cached result
                with open(cache_file, 'rb') as f:
                    cached_result = pickle.load(f)

                self.hits += 1
                self.logger.debug(f"Cache HIT: {file_path} ({validation_type})")
                return cached_result

            except Exception as e:
                self.logger.debug(f"Cache retrieval failed for {file_path}: {e}")
                self.misses += 1
                return None

    def set(self, file_path: str, result: Any, validation_type: str = "ast") -> bool:
        """
        Store validation result in cache.

        Args:
            file_path: Path to the file being validated
            result: Validation result to cache
            validation_type: Type of validation (ast, module, style)

        Returns:
            True if successfully cached, False otherwise
        """
        with self.lock:
            try:
                cache_key = self._get_cache_key(file_path, validation_type)
                cache_file = self._get_cache_file_path(cache_key)

                # Store result in cache
                with open(cache_file, 'wb') as f:
                    pickle.dump(result, f)

                self.logger.debug(f"Cache SET: {file_path} ({validation_type})")

                # Trigger cleanup if cache is getting large
                self._cleanup_if_needed()

                return True

            except Exception as e:
                self.logger.error(f"Failed to cache result for {file_path}: {e}")
                return False

    def invalidate(self, file_path: str = None) -> int:
        """
        Invalidate cache entries.

        Args:
            file_path: Specific file to invalidate, or None to clear all

        Returns:
            Number of cache entries removed
        """
        with self.lock:
            removed_count = 0

            if file_path is None:
                # Clear entire cache
                for cache_file in self.cache_dir.glob("*.cache"):
                    cache_file.unlink()
                    removed_count += 1
                self.logger.info(f"Cleared entire cache: {removed_count} entries")
            else:
                # Invalidate specific file (all validation types)
                for validation_type in ["ast", "module", "style", "structured"]:
                    cache_key = self._get_cache_key(file_path, validation_type)
                    cache_file = self._get_cache_file_path(cache_key)
                    if cache_file.exists():
                        cache_file.unlink()
                        removed_count += 1

                self.logger.debug(f"Invalidated cache for {file_path}: {removed_count} entries")

            return removed_count

    def _cleanup_if_needed(self) -> None:
        """Clean up cache if it exceeds size limits."""
        try:
            total_size = sum(f.stat().st_size for f in self.cache_dir.glob("*.cache"))

            if total_size <= self.max_size_bytes:
                return

            # Remove oldest cache files until under size limit
            cache_files = list(self.cache_dir.glob("*.cache"))
            cache_files.sort(key=lambda f: f.stat().st_mtime)  # Oldest first

            removed_count = 0
            for cache_file in cache_files:
                cache_file.unlink()
                removed_count += 1
                total_size -= cache_file.stat().st_size

                if total_size <= self.max_size_bytes * 0.8:  # Remove extra 20% for buffer
                    break

            self.logger.info(f"Cache cleanup: removed {removed_count} old entries")

        except Exception as e:
            self.logger.error(f"Cache cleanup failed: {e}")

    def get_stats(self) -> Dict[str, Any]:
        """Get cache performance statistics."""
        total_requests = self.hits + self.misses
        hit_rate = (self.hits / total_requests * 100) if total_requests > 0 else 0

        cache_files = list(self.cache_dir.glob("*.cache"))
        total_size = sum(f.stat().st_size for f in cache_files)

        return {
            "hits": self.hits,
            "misses": self.misses,
            "hit_rate_percent": round(hit_rate, 1),
            "total_entries": len(cache_files),
            "total_size_mb": round(total_size / (1024 * 1024), 2),
            "max_size_mb": round(self.max_size_bytes / (1024 * 1024), 2)
        }


class GitIntegration:
    """
    Git integration for incremental validation and change detection.

    Provides functionality to detect changed files, staged files, and compare
    against different branches for efficient incremental validation.
    """

    def __init__(self):
        self.logger = logging.getLogger(__name__ + '.GitIntegration')
        self._git_available = self._check_git_available()

    def _check_git_available(self) -> bool:
        """Check if git is available and we're in a git repository."""
        try:
            result = subprocess.run(['git', 'rev-parse', '--git-dir'],
                                  capture_output=True, text=True, timeout=5)
            return result.returncode == 0
        except Exception:
            return False

    def get_changed_files(self, base_branch: str = 'main', include_untracked: bool = False) -> List[str]:
        """
        Get list of Python files changed since base branch.

        Args:
            base_branch: Base branch to compare against
            include_untracked: Whether to include untracked files

        Returns:
            List of changed Python file paths
        """
        if not self._git_available:
            self.logger.warning("Git not available, cannot detect changed files")
            return []

        changed_files = []

        try:
            # Get files changed compared to base branch
            result = subprocess.run(['git', 'diff', '--name-only', f'{base_branch}...HEAD'],
                                  capture_output=True, text=True, timeout=10)

            if result.returncode == 0:
                changed_files.extend(result.stdout.strip().split('\n'))

            # Get unstaged changes
            result = subprocess.run(['git', 'diff', '--name-only'],
                                  capture_output=True, text=True, timeout=10)

            if result.returncode == 0:
                changed_files.extend(result.stdout.strip().split('\n'))

            # Get staged changes
            result = subprocess.run(['git', 'diff', '--cached', '--name-only'],
                                  capture_output=True, text=True, timeout=10)

            if result.returncode == 0:
                changed_files.extend(result.stdout.strip().split('\n'))

            # Get untracked files if requested
            if include_untracked:
                result = subprocess.run(['git', 'ls-files', '--others', '--exclude-standard'],
                                      capture_output=True, text=True, timeout=10)

                if result.returncode == 0:
                    changed_files.extend(result.stdout.strip().split('\n'))

            # Filter to Python files and remove duplicates
            python_files = list(set([
                f for f in changed_files
                if f.endswith('.py') and f.strip() and Path(f).exists()
            ]))

            self.logger.info(f"Found {len(python_files)} changed Python files")
            return python_files

        except Exception as e:
            self.logger.error(f"Failed to get changed files: {e}")
            return []

    def get_staged_files(self) -> List[str]:
        """
        Get list of staged Python files for pre-commit validation.

        Returns:
            List of staged Python file paths
        """
        if not self._git_available:
            return []

        try:
            result = subprocess.run(['git', 'diff', '--cached', '--name-only', '--diff-filter=ACMR'],
                                  capture_output=True, text=True, timeout=10)

            if result.returncode == 0:
                staged_files = [
                    f for f in result.stdout.strip().split('\n')
                    if f.endswith('.py') and f.strip() and Path(f).exists()
                ]

                self.logger.info(f"Found {len(staged_files)} staged Python files")
                return staged_files

        except Exception as e:
            self.logger.error(f"Failed to get staged files: {e}")

        return []

    def is_file_ignored(self, file_path: str) -> bool:
        """
        Check if a file is ignored by git.

        Args:
            file_path: Path to check

        Returns:
            True if file is ignored by git
        """
        if not self._git_available:
            return False

        try:
            result = subprocess.run(['git', 'check-ignore', file_path],
                                  capture_output=True, text=True, timeout=5)
            return result.returncode == 0
        except Exception:
            return False


class ValidationConfig:
    """
    Configuration management with multiple source support.

    Supports loading configuration from:
    - .cli-validation.yml
    - pyproject.toml
    - Environment variables
    - Direct initialization
    """

    def __init__(self, config_file: Optional[str] = None):
        """Initializes the ValidationConfig class with default settings and optional configuration file.
        Parameters:
            - config_file (Optional[str]): The path to a configuration file to override default settings.
        Returns:
            - None: This constructor does not return any value.
        Processing Logic:
            - Initializes various default settings related to caching, performance, validation rules, and CI/CD.
            - Loads additional configuration settings from the provided file, if specified."""
        self.logger = logging.getLogger(__name__ + '.ValidationConfig')

        # Default configuration
        self.cache_enabled = True
        self.cache_ttl_hours = 24
        self.cache_max_size_mb = 100
        self.cache_dir = ".validation-cache"

        # Performance settings
        self.parallel_workers = 4
        self.incremental_mode = True
        self.timeout_seconds = 300

        # Validation rules
        self.coverage_threshold = 95.0
        self.docstring_formats = ['google', 'numpy', 'sphinx']
        self.require_examples = True
        self.validate_parameter_types = True
        self.fail_on_warnings = False

        # CI/CD settings
        self.github_annotations = True
        self.pre_commit_quick_mode = True
        self.staged_files_only = True
        self.json_output = False

        # Load configuration from various sources
        self._load_configuration(config_file)

    def _load_configuration(self, config_file: Optional[str] = None) -> None:
        """Load configuration from multiple sources with priority order."""

        # 1. Try to load from specified config file
        if config_file and Path(config_file).exists():
            self._load_from_yaml(config_file)
            return

        # 2. Try default config file locations
        default_configs = [
            '.cli-validation.yml',
            '.cli-validation.yaml',
            'cli-validation.yml',
            'cli-validation.yaml'
        ]

        for config_path in default_configs:
            if Path(config_path).exists():
                self._load_from_yaml(config_path)
                break

        # 3. Try pyproject.toml
        self._load_from_pyproject()

        # 4. Load environment variables
        self._load_from_env()

        self.logger.debug("Configuration loaded from multiple sources")

    def _load_from_yaml(self, config_path: str) -> None:
        """Load configuration from YAML file."""
        try:
            import yaml

            with open(config_path) as f:
                config_data = yaml.safe_load(f)

            if not config_data:
                return

            # Update configuration from YAML
            validation_config = config_data.get('validation', {})
            performance_config = config_data.get('performance', {})
            rules_config = config_data.get('rules', {})
            ci_cd_config = config_data.get('ci_cd', {})

            # Cache settings
            cache_config = validation_config.get('cache', {})
            if cache_config:
                self.cache_enabled = cache_config.get('enabled', self.cache_enabled)
                self.cache_ttl_hours = cache_config.get('ttl_hours', self.cache_ttl_hours)
                self.cache_max_size_mb = cache_config.get('max_size_mb', self.cache_max_size_mb)
                self.cache_dir = cache_config.get('dir', self.cache_dir)

            # Performance settings
            if performance_config:
                self.parallel_workers = performance_config.get('parallel_workers', self.parallel_workers)
                self.incremental_mode = performance_config.get('incremental', self.incremental_mode)
                self.timeout_seconds = performance_config.get('timeout_seconds', self.timeout_seconds)

            # Validation rules
            if rules_config:
                self.coverage_threshold = rules_config.get('coverage_threshold', self.coverage_threshold)
                self.docstring_formats = rules_config.get('docstring_formats', self.docstring_formats)
                self.require_examples = rules_config.get('require_examples', self.require_examples)
                self.validate_parameter_types = rules_config.get('validate_parameter_types', self.validate_parameter_types)
                self.fail_on_warnings = rules_config.get('fail_on_warnings', self.fail_on_warnings)

            # CI/CD settings
            if ci_cd_config:
                github_config = ci_cd_config.get('github_actions', {})
                if github_config:
                    self.github_annotations = github_config.get('annotations', self.github_annotations)

                precommit_config = ci_cd_config.get('pre_commit', {})
                if precommit_config:
                    self.pre_commit_quick_mode = precommit_config.get('quick_mode', self.pre_commit_quick_mode)
                    self.staged_files_only = precommit_config.get('staged_files_only', self.staged_files_only)

            self.logger.info(f"Configuration loaded from {config_path}")

        except Exception as e:
            self.logger.warning(f"Failed to load YAML config from {config_path}: {e}")

    def _load_from_pyproject(self) -> None:
        """Load configuration from pyproject.toml."""
        if not TOML_SUPPORT:
            self.logger.debug("TOML support not available (tomllib/tomli not found)")
            return

        try:
            pyproject_path = Path('pyproject.toml')
            if not pyproject_path.exists():
                return

            with open(pyproject_path, 'rb') as f:
                pyproject_data = TOML_LOADER.load(f)

            # Look for tool.cli-validation section
            tool_config = pyproject_data.get('tool', {})
            cli_validation_config = tool_config.get('cli-validation', {})

            if cli_validation_config:
                # Apply same logic as YAML loading
                self._apply_config_dict(cli_validation_config)
                self.logger.info("Configuration loaded from pyproject.toml")

        except Exception as e:
            self.logger.warning(f"Failed to load pyproject.toml config: {e}")

    def _load_from_env(self) -> None:
        """Load configuration from environment variables."""
        env_prefix = 'CLI_VALIDATION_'

        env_mappings = {
            'CACHE_ENABLED': ('cache_enabled', bool),
            'CACHE_TTL_HOURS': ('cache_ttl_hours', int),
            'CACHE_MAX_SIZE_MB': ('cache_max_size_mb', int),
            'CACHE_DIR': ('cache_dir', str),
            'PARALLEL_WORKERS': ('parallel_workers', int),
            'INCREMENTAL_MODE': ('incremental_mode', bool),
            'TIMEOUT_SECONDS': ('timeout_seconds', int),
            'COVERAGE_THRESHOLD': ('coverage_threshold', float),
            'REQUIRE_EXAMPLES': ('require_examples', bool),
            'VALIDATE_PARAMETER_TYPES': ('validate_parameter_types', bool),
            'FAIL_ON_WARNINGS': ('fail_on_warnings', bool),
            'GITHUB_ANNOTATIONS': ('github_annotations', bool),
            'JSON_OUTPUT': ('json_output', bool)
        }

        for env_key, (attr_name, type_func) in env_mappings.items():
            env_value = os.environ.get(env_prefix + env_key)
            if env_value is not None:
                try:
                    if type_func == bool:
                        converted_value = env_value.lower() in ('true', '1', 'yes', 'on')
                    else:
                        converted_value = type_func(env_value)
                    setattr(self, attr_name, converted_value)
                    self.logger.debug(f"Environment variable {env_prefix + env_key} set {attr_name} = {converted_value}")
                except ValueError as e:
                    self.logger.warning(f"Invalid environment variable {env_prefix + env_key}: {e}")

    def _apply_config_dict(self, config_dict: Dict[str, Any]) -> None:
        """Apply configuration from a dictionary (helper for multiple sources)."""
        # This method can be used to apply configuration from any dictionary source
        # Implementation similar to _load_from_yaml but more generic
        pass

    def get_config_summary(self) -> Dict[str, Any]:
        """Get a summary of current configuration."""
        return {
            'cache': {
                'enabled': self.cache_enabled,
                'ttl_hours': self.cache_ttl_hours,
                'max_size_mb': self.cache_max_size_mb,
                'dir': self.cache_dir
            },
            'performance': {
                'parallel_workers': self.parallel_workers,
                'incremental_mode': self.incremental_mode,
                'timeout_seconds': self.timeout_seconds
            },
            'rules': {
                'coverage_threshold': self.coverage_threshold,
                'docstring_formats': self.docstring_formats,
                'require_examples': self.require_examples,
                'validate_parameter_types': self.validate_parameter_types,
                'fail_on_warnings': self.fail_on_warnings
            },
            'ci_cd': {
                'github_annotations': self.github_annotations,
                'pre_commit_quick_mode': self.pre_commit_quick_mode,
                'staged_files_only': self.staged_files_only,
                'json_output': self.json_output
            }
        }


class PerformanceOptimizer:
    """
    Performance optimization features including parallel processing and progress reporting.

    Provides multi-threaded validation, memory optimization, and progress tracking
    for efficient processing of large codebases.
    """

    def __init__(self, config: ValidationConfig):
        self.config = config
        self.logger = logging.getLogger(__name__ + '.PerformanceOptimizer')

    def validate_files_parallel(self, file_paths: List[str], validation_func: Callable[[str], Any]) -> List[Any]:
        """
        Validate multiple files in parallel using thread pool.

        Args:
            file_paths: List of file paths to validate
            validation_func: Function to call for each file

        Returns:
            List of validation results
        """
        if len(file_paths) <= 1 or self.config.parallel_workers <= 1:
            # Use sequential processing for single files or when parallel is disabled
            return [validation_func(file_path) for file_path in file_paths]

        results = []

        with ThreadPoolExecutor(max_workers=self.config.parallel_workers) as executor:
            # Submit all tasks
            future_to_file = {
                executor.submit(validation_func, file_path): file_path
                for file_path in file_paths
            }

            # Collect results with progress reporting
            completed = 0
            total = len(file_paths)

            for future in future_to_file:
                try:
                    result = future.result(timeout=self.config.timeout_seconds)
                    results.append(result)
                    completed += 1

                    if completed % 10 == 0 or completed == total:
                        self.logger.info(f"Validation progress: {completed}/{total} files completed")

                except Exception as e:
                    file_path = future_to_file[future]
                    self.logger.error(f"Validation failed for {file_path}: {e}")
                    # Add empty result to maintain order
                    results.append(None)

        return results

    def optimize_memory_usage(self) -> None:
        """Optimize memory usage by cleaning up caches and forcing garbage collection."""
        import gc

        # Force garbage collection
        collected = gc.collect()

        self.logger.debug(f"Memory optimization: collected {collected} objects")


class ASTAnalyzer:
    """Advanced AST analyzer for comprehensive code analysis."""

    def __init__(self):
        self.logger = logging.getLogger(__name__ + '.ASTAnalyzer')

    def extract_all_definitions(self, file_path: str) -> List[CodeDefinition]:
        """Extract all functions, methods, classes from a Python file."""
        definitions = []

        try:
            with open(file_path, 'r', encoding='utf-8') as f:
                content = f.read()

            tree = ast.parse(content, filename=file_path)

            # Extract classes and their methods
            for node in ast.walk(tree):
                if isinstance(node, ast.ClassDef):
                    # Add class definition
                    class_docstring = ast.get_docstring(node)
                    definitions.append(CodeDefinition(
                        name=node.name,
                        type='class',
                        file_path=file_path,
                        line_number=node.lineno,
                        docstring=class_docstring
                    ))

                    # Add methods within the class
                    for item in node.body:
                        if isinstance(item, ast.FunctionDef):
                            method_docstring = ast.get_docstring(item)
                            definitions.append(CodeDefinition(
                                name=item.name,
                                type='method',
                                file_path=file_path,
                                line_number=item.lineno,
                                docstring=method_docstring,
                                parent_class=node.name,
                                args=[arg.arg for arg in item.args.args],
                                is_cli_handler=self._is_cli_handler(item, content)
                            ))

                elif isinstance(node, ast.FunctionDef) and not self._is_nested_function(node, tree):
                    # Add top-level function definitions
                    func_docstring = ast.get_docstring(node)
                    definitions.append(CodeDefinition(
                        name=node.name,
                        type='function',
                        file_path=file_path,
                        line_number=node.lineno,
                        docstring=func_docstring,
                        args=[arg.arg for arg in node.args.args],
                        is_cli_handler=self._is_cli_handler(node, content)
                    ))

        except Exception as e:
            self.logger.error(f"Failed to parse {file_path}: {e}")

        return definitions

    def _is_cli_handler(self, node: ast.FunctionDef, content: str) -> bool:
        """Detect if a function is likely a CLI command handler."""
        # Check for common CLI patterns
        cli_indicators = [
            'subparsers.add_parser',
            'add_parser',
            'set_defaults',
            'argparse',
            'parser.add_argument'
        ]

        # Get function source
        try:
            lines = content.split('\n')
            func_lines = lines[node.lineno-1:node.end_lineno if hasattr(node, 'end_lineno') else node.lineno+20]
            func_source = '\n'.join(func_lines)

            return any(indicator in func_source for indicator in cli_indicators)
        except:
            return False

    def _is_nested_function(self, node: ast.FunctionDef, tree: ast.AST) -> bool:
        """Check if a function is nested inside another function."""
        for parent in ast.walk(tree):
            if isinstance(parent, (ast.FunctionDef, ast.AsyncFunctionDef)) and parent != node:
                for child in ast.walk(parent):
                    if child == node:
                        return True
        return False

    def analyze_docstring_coverage(self, definitions: List[CodeDefinition]) -> CoverageReport:
        """Calculate comprehensive docstring coverage metrics."""
        total = len(definitions)
        documented = sum(1 for d in definitions if d.docstring and d.docstring.strip())
        undocumented = total - documented

        coverage_percentage = (documented / total * 100) if total > 0 else 100.0

        undocumented_items = [d for d in definitions if not d.docstring or not d.docstring.strip()]

        return CoverageReport(
            total_definitions=total,
            documented_definitions=documented,
            undocumented_definitions=undocumented,
            coverage_percentage=coverage_percentage,
            undocumented_items=undocumented_items
        )

    def extract_function_signature(self, node: ast.FunctionDef) -> FunctionSignature:
        """
        Extract complete function signature including parameter and return types.

        Args:
            node: AST FunctionDef node

        Returns:
            FunctionSignature: Complete signature information
        """
        parameters = []

        # Extract parameters with type annotations
        for arg in node.args.args:
            param_type = None
            default_value = None

            # Get type annotation if present
            if arg.annotation:
                try:
                    param_type = ast.unparse(arg.annotation)
                except AttributeError:
                    # For Python < 3.9, use a simpler approach
                    param_type = self._annotation_to_string(arg.annotation)

            # Check for default values