ezlocalai/cli.py at main · DevXT-LLC/ezlocalai · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
946
947
948
949
950
951
952
953
954
955
956
957
958
959
960
961
962
963
964
965
966
967
968
969
970
971
972
973
974
975
976
977
978
979
980
981
982
983
984
985
986
987
988
989
990
991
992
993
994
995
996
997
998
999
1000
#!/usr/bin/env python3
"""
ezlocalai CLI - Run local AI inference with ease.

This CLI manages local LLM, TTS, STT, and image generation.
Supports Docker mode (default on x86_64) and native mode (ARM64/Jetson or --native flag).
Automatically detects GPU availability and architecture to pick the best mode.

Usage:
    ezlocalai start [--model MODEL] [--uri URI] [--api-key KEY] [--ngrok TOKEN] [--native]
    ezlocalai stop
    ezlocalai restart [--model MODEL] [--uri URI] [--api-key KEY] [--ngrok TOKEN] [--native]
    ezlocalai status
    ezlocalai logs [-f]
    ezlocalai prompt "your prompt" [-m MODEL] [-temp TEMPERATURE] [-tp TOP_P] [-image PATH]
"""

from __future__ import annotations

import argparse
import base64
import json
import os
import platform
import re
import shutil
import signal
import socket
import subprocess
import sys
import time
import urllib.error
import urllib.request
from pathlib import Path
from typing import Optional

# Version
__version__ = "1.0.5"

# Configuration
DOCKER_IMAGE = "joshxt/ezlocalai:latest"
DOCKER_IMAGE_CUDA = "ezlocalai:cuda"  # Built locally, not from DockerHub
DOCKER_IMAGE_ROCM = "ezlocalai:rocm"  # Built locally for AMD GPUs
DOCKER_IMAGE_JETSON = "ezlocalai:jetson"  # Built locally on Jetson ARM64+CUDA
CONTAINER_NAME = "ezlocalai"
DEFAULT_PORT = 8091
STATE_DIR = Path.home() / ".ezlocalai"
STATE_DIR.mkdir(parents=True, exist_ok=True)
ENV_FILE = STATE_DIR / ".env"
LOG_FILE = STATE_DIR / "ezlocalai.log"
PID_FILE = STATE_DIR / "ezlocalai.pid"
SOURCE_DIR_FILE = STATE_DIR / "source_dir"
REPO_URL = "https://github.com/DevXT-LLC/ezlocalai.git"
REPO_DIR = STATE_DIR / "repo"

# Cache for uv availability (checked once per process)
_uv_available: Optional[bool] = None


def get_local_ip() -> str:
    """Detect the local network IP address.

    Uses a UDP socket trick to find the IP of the interface that would
    route to the internet, without actually sending any data.
    Falls back to 'localhost' if detection fails.
    """
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        try:
            s.connect(("8.8.8.8", 80))
            ip = s.getsockname()[0]
        finally:
            s.close()
        return ip
    except Exception:
        return "localhost"


def _ensure_uv_installed() -> bool:
    """Ensure uv is installed. Installs it if missing.

    uv is a fast Python package manager (10-100x faster than pip).
    Used as a drop-in replacement for 'pip install' throughout the CLI.
    Falls back to pip if uv cannot be installed.

    Returns True if uv is available.
    """
    global _uv_available
    if _uv_available is not None:
        return _uv_available

    if shutil.which("uv"):
        _uv_available = True
        return True

    # Try installing uv via pip (fastest, doesn't need curl/sudo)
    python = sys.executable
    result = subprocess.run(
        [python, "-m", "pip", "install", "uv", "-q"],
        capture_output=True,
        check=False,
    )
    if result.returncode == 0 and shutil.which("uv"):
        _uv_available = True
        print("⚡ Installed uv for faster package management")
        return True

    # Try the official installer as fallback
    try:
        result = subprocess.run(
            ["sh", "-c", "curl -LsSf https://astral.sh/uv/install.sh | sh"],
            capture_output=True,
            check=False,
        )
        # uv installs to ~/.local/bin or ~/.cargo/bin
        for uv_dir in [
            Path.home() / ".local" / "bin",
            Path.home() / ".cargo" / "bin",
        ]:
            uv_path = uv_dir / "uv"
            if uv_path.exists():
                if str(uv_dir) not in os.environ.get("PATH", ""):
                    os.environ["PATH"] = f"{uv_dir}:{os.environ.get('PATH', '')}"
                _uv_available = True
                print("⚡ Installed uv for faster package management")
                return True
    except Exception:
        pass

    _uv_available = False
    return False


def _get_pip_cmd(python: str = None, subcommand: str = None) -> list[str]:
    """Get the pip command prefix with optional subcommand.

    Returns ['uv', 'pip'] or [python, '-m', 'pip'] as the base.
    If subcommand is given (e.g. 'install', 'uninstall'), it is appended
    along with --python for uv (which requires it after the subcommand).

    Examples:
        _get_pip_cmd(python)              -> ['uv', 'pip'] or [python, '-m', 'pip']
        _get_pip_cmd(python, 'install')   -> ['uv', 'pip', 'install', '--python', python]
                                          or [python, '-m', 'pip', 'install']
    """
    if python is None:
        python = sys.executable
    use_uv = _uv_available or _ensure_uv_installed()
    if use_uv:
        cmd = ["uv", "pip"]
        if subcommand:
            cmd.append(subcommand)
            cmd.extend(["--python", python])
        return cmd
    cmd = [python, "-m", "pip"]
    if subcommand:
        cmd.append(subcommand)
    return cmd


def _pip_install(
    packages: list[str],
    python: str = None,
    extra_args: list[str] = None,
    **kwargs,
) -> subprocess.CompletedProcess:
    """Install packages using uv (fast) or pip (fallback).

    Args:
        packages: List of package specs (e.g. ['torch', 'numpy>=1.20'])
        python: Python executable path. Defaults to sys.executable.
        extra_args: Additional args like ['--no-deps', '--index-url', '...']
        **kwargs: Passed to subprocess.run (e.g. capture_output, cwd, env)
    """
    if python is None:
        python = sys.executable
    cmd = _get_pip_cmd(python, "install") + packages
    if extra_args:
        cmd.extend(extra_args)
    kwargs.setdefault("check", False)
    return subprocess.run(cmd, **kwargs)


def _pip_uninstall(
    packages: list[str],
    python: str = None,
    **kwargs,
) -> subprocess.CompletedProcess:
    """Uninstall packages using uv (fast) or pip (fallback)."""
    if python is None:
        python = sys.executable
    cmd = _get_pip_cmd(python, "uninstall") + packages + ["-y"]
    kwargs.setdefault("check", False)
    return subprocess.run(cmd, **kwargs)


def is_arm64() -> bool:
    """Check if running on ARM64/aarch64 architecture."""
    machine = platform.machine().lower()
    return machine in ("aarch64", "arm64")


def is_jetson() -> bool:
    """Check if running on an NVIDIA Jetson device."""
    # Check for Jetson-specific files
    if Path("/etc/nv_tegra_release").exists():
        return True
    try:
        model_path = Path("/proc/device-tree/model")
        if model_path.exists():
            model = model_path.read_text(errors="ignore").lower()
            if "jetson" in model or "tegra" in model:
                return True
    except Exception:
        pass
    return False


def has_jetson_cuda() -> bool:
    """Check if CUDA is available on Jetson (via JetPack/tegrastats)."""
    # Jetson doesn't have nvidia-smi, check for CUDA libraries
    cuda_paths = [
        Path("/usr/local/cuda"),
        Path("/usr/lib/aarch64-linux-gnu/libcuda.so"),
    ]
    if any(p.exists() for p in cuda_paths):
        return True
    # Check tegrastats
    if shutil.which("tegrastats"):
        return True
    return False


def _detect_jetpack_version() -> Optional[str]:
    """Detect the JetPack version on a Jetson device.

    Returns a version string like '60' (JetPack 6.0) or '61' (JetPack 6.1)
    suitable for constructing NVIDIA wheel download URLs. Returns None if
    detection fails.
    """
    # Method 1: dpkg query for nvidia-jetpack meta-package
    try:
        result = subprocess.run(
            ["dpkg-query", "--showformat=${Version}", "-W", "nvidia-jetpack"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0 and result.stdout.strip():
            ver = result.stdout.strip()
            # Parse "6.0-b123" or "6.1.1-b456" -> major.minor
            match = re.match(r"(\d+)\.(\d+)", ver)
            if match:
                return f"{match.group(1)}{match.group(2)}"
    except FileNotFoundError:
        pass

    # Method 2: dpkg query for nvidia-l4t-core (L4T version -> JetPack mapping)
    try:
        result = subprocess.run(
            ["dpkg-query", "--showformat=${Version}", "-W", "nvidia-l4t-core"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0 and result.stdout.strip():
            ver = result.stdout.strip()
            match = re.match(r"(\d+)\.(\d+)", ver)
            if match:
                l4t_major, l4t_minor = int(match.group(1)), int(match.group(2))
                # L4T R36.x = JetPack 6.x
                if l4t_major == 36:
                    if l4t_minor >= 4:
                        return "61"
                    return "60"
                # L4T R35.x = JetPack 5.1
                if l4t_major == 35:
                    return "51"
                # L4T R34.x = JetPack 5.0
                if l4t_major == 34:
                    return "50"
    except FileNotFoundError:
        pass

    # Method 3: Use CUDA version as a rough proxy
    cuda_ver = get_cuda_version()
    if cuda_ver:
        cuda_major_minor = tuple(int(x) for x in cuda_ver.split(".")[:2])
        if cuda_major_minor >= (12, 6):
            return "61"
        if cuda_major_minor >= (12, 2):
            return "60"
        if cuda_major_minor >= (11, 4):
            return "51"

    return None


def _install_jetson_torch(python: str) -> bool:
    """Install CUDA-enabled PyTorch on Jetson.

    PyPI's torch package is CPU-only on aarch64. This function tries multiple
    sources to find a CUDA-enabled wheel that matches the device's JetPack
    version and Python version:

    1. NVIDIA's official redistribution index (cp38/cp310 only typically)
    2. PyTorch's own CUDA index (https://download.pytorch.org/whl/cuXXX)
    3. Jetson AI Lab community index (https://pypi.jetson-ai-lab.dev/simple/)

    Must be called BEFORE installing other requirements so that transitive
    torch dependencies don't pull in the CPU-only version from PyPI.

    Returns True if a CUDA-enabled torch was installed, False otherwise.
    """
    from html.parser import HTMLParser

    jp_ver = _detect_jetpack_version()
    if not jp_ver:
        print(
            "⚠️  Could not detect JetPack version — "
            "skipping NVIDIA PyTorch install (torch will be CPU-only)"
        )
        return False

    py_tag = f"cp{sys.version_info.major}{sys.version_info.minor}"
    print(f"🔍 Detected JetPack {jp_ver[0]}.{jp_ver[1:]}, Python {py_tag}")

    # --- Source 1: NVIDIA official wheel index ---
    base_url = (
        f"https://developer.download.nvidia.com/compute/redist/"
        f"jp/v{jp_ver}/pytorch/"
    )
    print(f"📦 Checking NVIDIA PyTorch wheels at {base_url} ...")

    class _WheelLinkParser(HTMLParser):
        def __init__(self):
            super().__init__()
            self.wheels: list[str] = []

        def handle_starttag(self, tag, attrs):
            if tag == "a":
                for attr_name, attr_val in attrs:
                    if attr_name == "href" and attr_val and attr_val.endswith(".whl"):
                        self.wheels.append(attr_val)

    nvidia_wheel_url = None
    try:
        req = urllib.request.Request(base_url, headers={"User-Agent": "ezlocalai"})
        with urllib.request.urlopen(req, timeout=30) as resp:
            html = resp.read().decode("utf-8", errors="replace")
        parser = _WheelLinkParser()
        parser.feed(html)
        matching = [
            w for w in parser.wheels if f"-{py_tag}-" in w and "linux_aarch64" in w
        ]
        if matching:
            matching.sort()
            wheel_name = matching[-1]
            nvidia_wheel_url = (
                wheel_name if wheel_name.startswith("http") else base_url + wheel_name
            )
            print(f"   Found NVIDIA wheel: {wheel_name}")
        else:
            available_tags = set()
            for w in parser.wheels:
                m = re.search(r"-(cp\d+)-", w)
                if m:
                    available_tags.add(m.group(1))
            print(
                f"   No {py_tag} wheel on NVIDIA index "
                f"(available: {', '.join(sorted(available_tags)) or 'none'})"
            )
    except Exception as exc:
        print(f"   NVIDIA index unavailable: {exc}")

    if nvidia_wheel_url:
        print(f"⬇️  Installing NVIDIA PyTorch: {nvidia_wheel_url.split('/')[-1]}")
        _pip_uninstall(["torch"], python=python, capture_output=True)
        result = _pip_install(
            [nvidia_wheel_url], python=python, extra_args=["--no-cache-dir"]
        )
        if result.returncode == 0 and _verify_torch_cuda(python):
            return True
        print("   NVIDIA wheel installation failed, trying alternatives...")

    # --- Source 2: PyTorch's official CUDA index ---
    # Map JetPack CUDA version to PyTorch's index URL
    cuda_ver = get_cuda_version()
    cuda_index_tag = None
    if cuda_ver:
        cv = tuple(int(x) for x in cuda_ver.split(".")[:2])
        # Match to closest PyTorch CUDA index
        if cv >= (12, 6):
            cuda_index_tag = "cu126"
        elif cv >= (12, 4):
            cuda_index_tag = "cu124"
        elif cv >= (12, 1):
            cuda_index_tag = "cu121"
        elif cv >= (11, 8):
            cuda_index_tag = "cu118"
        elif cv >= (11, 4):
            cuda_index_tag = "cu118"  # closest available

    if cuda_index_tag:
        pytorch_index = f"https://download.pytorch.org/whl/{cuda_index_tag}"
        print(f"📦 Trying PyTorch CUDA index ({cuda_index_tag})...")
        _pip_uninstall(["torch"], python=python, capture_output=True)
        result = _pip_install(
            ["torch"],
            python=python,
            extra_args=["--index-url", pytorch_index, "--no-cache-dir"],
            capture_output=True,
        )
        if result.returncode == 0 and _verify_torch_cuda(python):
            return True
        print(f"   No matching aarch64+CUDA wheel on PyTorch {cuda_index_tag} index")

    # --- Source 3: Jetson AI Lab community index ---
    jetson_index = "https://pypi.jetson-ai-lab.dev/simple/"
    print(f"📦 Trying Jetson AI Lab community index...")
    _pip_uninstall(["torch"], python=python, capture_output=True)
    result = _pip_install(
        ["torch"],
        python=python,
        extra_args=["--extra-index-url", jetson_index, "--no-cache-dir"],
        capture_output=True,
    )
    if result.returncode == 0 and _verify_torch_cuda(python):
        return True
    print("   No matching wheel on Jetson AI Lab index")

    # --- Source 4: PyPI default (CPU-only, last resort) ---
    print("⚠️  No CUDA-enabled PyTorch wheel found for this platform.")
    print("   Installing CPU-only torch from PyPI as fallback...")
    _pip_uninstall(["torch"], python=python, capture_output=True)
    result = _pip_install(
        ["torch"], python=python, extra_args=["--no-cache-dir"], capture_output=True
    )
    if result.returncode == 0:
        print("   torch installed (CPU-only). GPU inference will be slower.")
        return True

    print("   ❌ Failed to install torch from any source")
    return False


def _verify_torch_cuda(python: str) -> bool:
    """Verify that torch is installed and CUDA-enabled."""
    result = subprocess.run(
        [
            python,
            "-c",
            (
                "import torch; "
                "cuda = torch.cuda.is_available(); "
                "print('CUDA available:', cuda); "
                "print('GPU:', torch.cuda.get_device_name(0) if cuda else 'none')"
            ),
        ],
        capture_output=True,
        text=True,
        check=False,
    )
    if result.returncode == 0 and "CUDA available: True" in result.stdout:
        print(f"   ✅ {result.stdout.strip()}")
        return True
    if result.returncode == 0:
        # torch imported but CUDA not available
        print(f"   ⚠️  {result.stdout.strip()}")
    else:
        stderr = result.stderr.strip().split("\n")[-1] if result.stderr else "unknown"
        print(f"   ⚠️  torch import failed: {stderr}")
    return False


def should_use_native_mode(force_native: bool = False) -> bool:
    """Determine if native mode should be used instead of Docker.

    Native mode is used when:
    - --native flag is passed
    - Docker is not available or not running
    - On Jetson without Docker + nvidia-container-runtime

    On Jetson with Docker available, we prefer Docker mode using the
    locally-built Jetson image (avoids ARM64 dependency hell).
    """
    if force_native:
        return True
    if not is_tool_installed("docker") or not is_docker_running():
        return True
    # Jetson with Docker: use Docker mode if nvidia-container-runtime is available
    if is_arm64() and is_jetson():
        # Check for NVIDIA container runtime (required for GPU access in Docker)
        result = subprocess.run(
            ["docker", "info"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0 and "nvidia" in result.stdout.lower():
            return False  # Docker mode with Jetson image
        # No NVIDIA runtime — fall back to native
        print("⚠️  Docker found but no NVIDIA container runtime.")
        print("   Install with: sudo apt-get install nvidia-container")
        print("   Falling back to native mode...")
        return True
    # Non-Jetson ARM64 without a Docker image
    if is_arm64():
        return True
    return False


def is_ezlocalai_folder(folder: Path) -> bool:
    """Check if the given folder is the ezlocalai source folder.

    Detects the ezlocalai folder by checking for key files that exist
    in the source repository but not in typical installation locations.
    """
    key_files = [
        "docker-compose.yml",
        "docker-compose-cuda.yml",
        "Dockerfile",
        "cuda.Dockerfile",
        "app.py",  # Main app file
    ]
    key_dirs = [
        "ezlocalai",  # Python module folder
    ]
    files_exist = all((folder / f).exists() for f in key_files)
    dirs_exist = all((folder / d).is_dir() for d in key_dirs)
    return files_exist and dirs_exist


def get_ezlocalai_source_dir() -> Optional[Path]:
    """Get the ezlocalai source directory.

    Checks (in order):
    1. Current working directory (if it's the ezlocalai folder)
    2. Persisted source directory from ~/.ezlocalai/source_dir

    When found via cwd, the path is persisted so future invocations
    from other directories still find the source .env and compose files.
    """
    cwd = Path.cwd()
    if is_ezlocalai_folder(cwd):
        # Persist for future use from other directories
        try:
            SOURCE_DIR_FILE.write_text(str(cwd), encoding="utf-8")
        except OSError:
            pass
        return cwd

    # Check persisted path
    if SOURCE_DIR_FILE.exists():
        try:
            saved = Path(SOURCE_DIR_FILE.read_text(encoding="utf-8").strip())
            if saved.exists() and is_ezlocalai_folder(saved):
                return saved
        except (OSError, ValueError):
            pass

    return None


class CLIError(RuntimeError):
    """Raised for recoverable CLI errors."""


def is_tool_installed(tool: str) -> bool:
    """Check if a command-line tool is installed."""
    try:
        result = subprocess.run(
            [tool, "--version"],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
            check=False,
        )
        return result.returncode == 0
    except FileNotFoundError:
        return False


def is_docker_running() -> bool:
    """Check if Docker daemon is running."""
    try:
        result = subprocess.run(
            ["docker", "info"],
            stdout=subprocess.DEVNULL,
            stderr=subprocess.DEVNULL,
            check=False,
        )
        return result.returncode == 0
    except FileNotFoundError:
        return False


def has_nvidia_gpu() -> bool:
    """Check if NVIDIA GPU is available."""
    try:
        result = subprocess.run(
            ["nvidia-smi", "--query-gpu=name", "--format=csv,noheader"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0 and result.stdout.strip():
            return True
    except FileNotFoundError:
        pass
    return False


def has_nvidia_container_toolkit() -> bool:
    """Check if NVIDIA Container Toolkit is installed."""
    try:
        result = subprocess.run(
            [
                "docker",
                "run",
                "--rm",
                "--gpus",
                "all",
                "nvidia/cuda:12.8.1-base-ubuntu24.04",
                "nvidia-smi",
            ],
            capture_output=True,
            text=True,
            check=False,
            timeout=60,
        )
        return result.returncode == 0
    except (FileNotFoundError, subprocess.TimeoutExpired):
        return False


def get_nvidia_gpu_info() -> Optional[str]:
    """Get NVIDIA GPU information."""
    try:
        result = subprocess.run(
            ["nvidia-smi", "--query-gpu=name,memory.total", "--format=csv,noheader"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0:
            return result.stdout.strip()
    except FileNotFoundError:
        pass
    return None


def has_amd_gpu() -> bool:
    """Check if AMD GPU is available via ROCm."""
    try:
        result = subprocess.run(
            ["rocm-smi", "--showproductname"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0 and result.stdout.strip():
            return True
    except FileNotFoundError:
        pass
    # Also check for /dev/kfd which indicates ROCm-capable hardware
    if Path("/dev/kfd").exists() and Path("/dev/dri").exists():
        return True
    return False


def has_rocm_support() -> bool:
    """Check if ROCm is properly installed and functional."""
    try:
        result = subprocess.run(
            ["rocminfo"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0 and "Agent" in result.stdout:
            return True
    except FileNotFoundError:
        pass
    return False


def get_amd_gpu_info() -> Optional[str]:
    """Get AMD GPU information."""
    try:
        result = subprocess.run(
            ["rocm-smi", "--showproductname"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0:
            # Parse the output to get GPU name
            for line in result.stdout.splitlines():
                if "GPU" in line or "Card" in line:
                    return line.strip()
            return result.stdout.strip()
    except FileNotFoundError:
        pass
    # Fallback: try lspci
    try:
        result = subprocess.run(
            ["lspci"],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode == 0:
            for line in result.stdout.splitlines():
                if "VGA" in line and ("AMD" in line or "Radeon" in line):
                    return line.split(":")[-1].strip()
    except FileNotFoundError:
        pass
    return None


def prompt_user(prompt: str, default: str = "") -> str:
    """Prompt user for input with optional default."""
    if default:
        user_input = input(f"{prompt} (default: {default}): ").strip()
    else:
        user_input = input(f"{prompt}: ").strip()
    return user_input if user_input else default


def _ensure_git_access(repo_dir: Path) -> None:
    """Ensure git operations will work in the given directory.

    Fixes two common issues:
    1. 'dubious ownership' — when repo was cloned with sudo but run as a normal user.
       Adds the directory to git's global safe.directory list.
    2. File ownership — chowns the directory to the current user if not writable.
    """
    if not (repo_dir / ".git").exists():
        return

    # Add to safe.directory if not already there
    result = subprocess.run(
        ["git", "config", "--global", "--get-all", "safe.directory"],
        capture_output=True,
        text=True,
        check=False,
    )
    safe_dirs = result.stdout.strip().splitlines() if result.returncode == 0 else []
    repo_str = str(repo_dir)
    if repo_str not in safe_dirs:
        subprocess.run(
            ["git", "config", "--global", "--add", "safe.directory", repo_str],
            capture_output=True,
            check=False,
        )

    # Fix ownership if directory is not writable by current user
    if not os.access(repo_dir, os.W_OK):
        import getpass

        user = getpass.getuser()
        subprocess.run(
            ["sudo", "chown", "-R", f"{user}:{user}", repo_str],
            capture_output=True,
            check=False,
        )


def clone_or_update_repo() -> Path:
    """Clone or update the ezlocalai repository for building CUDA image.

    Returns the path to the ezlocalai source directory (either local or cloned).
    """
    # First check if we're running from within the ezlocalai folder
    local_source = get_ezlocalai_source_dir()
    if local_source:
        print("📦 Using local ezlocalai source folder...")
        print(f"   Path: {local_source}")
        _ensure_git_access(local_source)
        return local_source

    # Fall back to cloning/updating the repo
    if REPO_DIR.exists():
        _ensure_git_access(REPO_DIR)
        print("📦 Updating ezlocalai repository...")
        result = subprocess.run(
            ["git", "pull"],
            cwd=REPO_DIR,
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode != 0:
            print(f"⚠️  Failed to update repo: {result.stderr}")
            # Try to continue with existing repo
            return REPO_DIR
        print("✅ Repository updated")
        return REPO_DIR
    else:
        print("📦 Cloning ezlocalai repository...")
        result = subprocess.run(
            ["git", "clone", REPO_URL, str(REPO_DIR)],
            capture_output=True,
            text=True,
            check=False,
        )
        if result.returncode != 0:
            print(f"❌ Failed to clone repo: {result.stderr}")
            return None
        print("✅ Repository cloned")
        return REPO_DIR


def build_cuda_image() -> bool:
    """Build the CUDA Docker image from source using docker-compose."""
    source_dir = clone_or_update_repo()
    if not source_dir:
        return False

    print("\n🔨 Building CUDA image (this may take 10-20 minutes)...")
    print("   Building from: docker-compose-cuda.yml")
    print(f"   Source directory: {source_dir}")

    # Build using docker-compose (handles complex builds better)
    result = subprocess.run(
        ["docker", "compose", "-f", "docker-compose-cuda.yml", "build"],
        cwd=source_dir,
        check=False,
    )

    if result.returncode != 0:
        print("❌ Failed to build CUDA image")
        return False

    # Tag the image with our expected name
    # docker-compose names it based on folder name
    print("   Tagging image as ezlocalai:cuda...")

    # Determine the expected image name based on folder
    folder_name = source_dir.name
    expected_names = [
        f"{folder_name}-ezlocalai:latest",
        "repo-ezlocalai:latest",
        f"{folder_name}_ezlocalai:latest",
    ]

    tagged = False
    for expected_name in expected_names:
        tag_result = subprocess.run(
            ["docker", "tag", expected_name, DOCKER_IMAGE_CUDA],
            capture_output=True,
            check=False,
        )
        if tag_result.returncode == 0:
            tagged = True
            break

    if not tagged:
        print("⚠️  Could not tag image, trying to find it...")
        # List images and try to find one that matches
        list_result = subprocess.run(
            ["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"],
            capture_output=True,
            text=True,
            check=False,
        )
        if list_result.returncode == 0:
            for line in list_result.stdout.splitlines():
                if "ezlocalai" in line.lower() and "cuda" not in line:
                    subprocess.run(
                        ["docker", "tag", line.strip(), DOCKER_IMAGE_CUDA],
                        check=False,
                    )
                    tagged = True
                    break

    print("✅ CUDA image built successfully")
    return True


def cuda_image_exists() -> bool:
    """Check if the CUDA image exists locally."""
    result = subprocess.run(
        ["docker", "images", "-q", DOCKER_IMAGE_CUDA],
        capture_output=True,
        text=True,
        check=False,
    )
    return bool(result.stdout.strip())


def rocm_image_exists() -> bool:
    """Check if the ROCm image exists locally."""
    result = subprocess.run(
        ["docker", "images", "-q", DOCKER_IMAGE_ROCM],
        capture_output=True,
        text=True,
        check=False,
    )
    return bool(result.stdout.strip())


def jetson_image_exists() -> bool:
    """Check if the Jetson image exists locally."""
    result = subprocess.run(
        ["docker", "images", "-q", DOCKER_IMAGE_JETSON],
        capture_output=True,
        text=True,
        check=False,
    )
    return bool(result.stdout.strip())


def build_jetson_image() -> bool:
    """Build the Jetson Docker image from source using docker-compose.

    Must be run ON the Jetson itself (ARM64 native build).
    Detects JetPack version and CUDA architecture automatically.
    """
    source_dir = clone_or_update_repo()
    if not source_dir:
        return False

    # Auto-detect L4T tag and CUDA arch
    l4t_tag = "r36.4.0"  # Default: JetPack 6
    cuda_arch = "87"  # Default: Orin

    jp_ver = _detect_jetpack_version()
    if jp_ver:
        jp_major = int(jp_ver[0])
        if jp_major == 5:
            l4t_tag = "r35.4.1"  # JetPack 5.1
        elif jp_major == 6:
            l4t_tag = "r36.4.0"  # JetPack 6

    detected_arch = detect_jetson_cuda_arch()
    if detected_arch:
        cuda_arch = detected_arch

    print(f"\n🔨 Building Jetson image (this may take 15-30 minutes)...")
    print(f"   Building from: docker-compose-jetson.yml")
    print(f"   L4T base: {l4t_tag}, CUDA arch: {cuda_arch}")
    print(f"   Source directory: {source_dir}")

    build_env = os.environ.copy()
    build_env["L4T_TAG"] = l4t_tag
    build_env["CUDA_ARCH"] = cuda_arch

    result = subprocess.run(
        ["docker", "compose", "-f", "docker-compose-jetson.yml", "build"],
        cwd=source_dir,
        env=build_env,
        check=False,
    )

    if result.returncode != 0:
        print("❌ Failed to build Jetson image")
        return False

    # Tag the image with our expected name
    print("   Tagging image as ezlocalai:jetson...")
    folder_name = source_dir.name
    expected_names = [
        f"{folder_name}-ezlocalai:latest",
        "repo-ezlocalai:latest",
        f"{folder_name}_ezlocalai:latest",
    ]

    tagged = False
    for expected_name in expected_names:
        tag_result = subprocess.run(
            ["docker", "tag", expected_name, DOCKER_IMAGE_JETSON],
            capture_output=True,
            check=False,
        )
        if tag_result.returncode == 0:
            tagged = True
            break

    if not tagged:
        list_result = subprocess.run(
            ["docker", "images", "--format", "{{.Repository}}:{{.Tag}}"],
            capture_output=True,
            text=True,
            check=False,
        )
        if list_result.returncode == 0:
            for line in list_result.stdout.splitlines():
                if "ezlocalai" in line.lower():
                    subprocess.run(
                        ["docker", "tag", line.strip(), DOCKER_IMAGE_JETSON],
                        check=False,
                    )
                    tagged = True
                    break

    print("✅ Jetson image built successfully")
    return True