From 559c67a0e864f6f8f96a98fe6c29fa523a0f01ae Mon Sep 17 00:00:00 2001 From: gogurtenjoyer <36354352+gogurtenjoyer@users.noreply.github.com> Date: Mon, 3 Nov 2025 16:45:10 -0500 Subject: [PATCH 1/7] Wrap GGUF loader for context managed close() Wrap gguf.GGUFReader and then use a context manager to load memory-mapped GGUF files, so that they will automatically close properly when no longer needed. Should prevent the 'file in use in another process' errors on Windows. --- invokeai/backend/quantization/gguf/loaders.py | 50 ++++++++++++++----- 1 file changed, 38 insertions(+), 12 deletions(-) diff --git a/invokeai/backend/quantization/gguf/loaders.py b/invokeai/backend/quantization/gguf/loaders.py index 178c0508466..f4acc4c28ed 100644 --- a/invokeai/backend/quantization/gguf/loaders.py +++ b/invokeai/backend/quantization/gguf/loaders.py @@ -1,22 +1,48 @@ +import gc from pathlib import Path import gguf import torch +from invokeai.backend.util.logging import InvokeAILogger from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES +logger = InvokeAILogger.get_logger() + +class WrappedGGUFReader: + """Wrapper around GGUFReader that adds a close() method.""" + + def __init__(self, path: Path): + self.reader = gguf.GGUFReader(path) + + def __enter__(self): + return self.reader + + def __exit__(self, exc_type, exc_val, exc_tb): + self.close() + return False + + def close(self): + """Explicitly close the memory-mapped file.""" + if hasattr(self.reader, 'data') and hasattr(self.reader.data, '_mmap'): + try: + self.reader.data._mmap.close() + except (AttributeError, OSError, ValueError) as e: + logger.warning(f"Wasn't able to close GGUF memory map: {e}") + del self.reader + gc.collect() -def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTensor]: - reader = gguf.GGUFReader(path) - sd: dict[str, GGMLTensor] = {} - for tensor in reader.tensors: - torch_tensor = torch.from_numpy(tensor.data) - shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape))) - if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES: - torch_tensor = torch_tensor.view(*shape) - sd[tensor.name] = GGMLTensor( - torch_tensor, ggml_quantization_type=tensor.tensor_type, tensor_shape=shape, compute_dtype=compute_dtype - ) - return sd +def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTensor]: + with WrappedGGUFReader(path) as reader: + sd: dict[str, GGMLTensor] = {} + for tensor in reader.tensors: + torch_tensor = torch.from_numpy(tensor.data) + shape = torch.Size(tuple(int(v) for v in reversed(tensor.shape))) + if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES: + torch_tensor = torch_tensor.view(*shape) + sd[tensor.name] = GGMLTensor( + torch_tensor, ggml_quantization_type=tensor.tensor_type, tensor_shape=shape, compute_dtype=compute_dtype + ) + return sd From 0673826417120b42b7f33add50f6a020af9e66a2 Mon Sep 17 00:00:00 2001 From: gogurtenjoyer <36354352+gogurtenjoyer@users.noreply.github.com> Date: Mon, 3 Nov 2025 16:48:37 -0500 Subject: [PATCH 2/7] Additional check for cached state_dict Additional check for cached state_dict as path is now optional - should solve model manager 'missing' this and the resultant memory errors. --- invokeai/backend/model_manager/model_on_disk.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/invokeai/backend/model_manager/model_on_disk.py b/invokeai/backend/model_manager/model_on_disk.py index a77853c8f3d..284c4998589 100644 --- a/invokeai/backend/model_manager/model_on_disk.py +++ b/invokeai/backend/model_manager/model_on_disk.py @@ -84,6 +84,9 @@ def load_state_dict(self, path: Optional[Path] = None) -> StateDict: path = self.resolve_weight_file(path) + if path in self._state_dict_cache: + return self._state_dict_cache[path] + with SilenceWarnings(): if path.suffix.endswith((".ckpt", ".pt", ".pth", ".bin")): scan_result = scan_file_path(path) From f8c67a40c8f823c5aeb7e42439b8bdb82f8674d6 Mon Sep 17 00:00:00 2001 From: gogurtenjoyer <36354352+gogurtenjoyer@users.noreply.github.com> Date: Mon, 3 Nov 2025 17:30:20 -0500 Subject: [PATCH 3/7] Appease ruff --- invokeai/backend/quantization/gguf/loaders.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/invokeai/backend/quantization/gguf/loaders.py b/invokeai/backend/quantization/gguf/loaders.py index f4acc4c28ed..2056ee726d2 100644 --- a/invokeai/backend/quantization/gguf/loaders.py +++ b/invokeai/backend/quantization/gguf/loaders.py @@ -4,25 +4,25 @@ import gguf import torch -from invokeai.backend.util.logging import InvokeAILogger from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES +from invokeai.backend.util.logging import InvokeAILogger logger = InvokeAILogger.get_logger() class WrappedGGUFReader: """Wrapper around GGUFReader that adds a close() method.""" - + def __init__(self, path: Path): self.reader = gguf.GGUFReader(path) - + def __enter__(self): return self.reader - + def __exit__(self, exc_type, exc_val, exc_tb): self.close() return False - + def close(self): """Explicitly close the memory-mapped file.""" if hasattr(self.reader, 'data') and hasattr(self.reader.data, '_mmap'): From 7e83cf42a00d1ab67ad5afbfaa69315d55732a50 Mon Sep 17 00:00:00 2001 From: gogurtenjoyer <36354352+gogurtenjoyer@users.noreply.github.com> Date: Mon, 3 Nov 2025 18:11:18 -0500 Subject: [PATCH 4/7] Further ruff appeasement --- invokeai/backend/quantization/gguf/loaders.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/invokeai/backend/quantization/gguf/loaders.py b/invokeai/backend/quantization/gguf/loaders.py index 2056ee726d2..7f3435e8399 100644 --- a/invokeai/backend/quantization/gguf/loaders.py +++ b/invokeai/backend/quantization/gguf/loaders.py @@ -3,13 +3,13 @@ import gguf import torch - from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES from invokeai.backend.util.logging import InvokeAILogger logger = InvokeAILogger.get_logger() + class WrappedGGUFReader: """Wrapper around GGUFReader that adds a close() method.""" @@ -25,7 +25,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): def close(self): """Explicitly close the memory-mapped file.""" - if hasattr(self.reader, 'data') and hasattr(self.reader.data, '_mmap'): + if hasattr(self.reader, "data") and hasattr(self.reader.data, "_mmap"): try: self.reader.data._mmap.close() except (AttributeError, OSError, ValueError) as e: @@ -43,6 +43,9 @@ def gguf_sd_loader(path: Path, compute_dtype: torch.dtype) -> dict[str, GGMLTens if tensor.tensor_type in TORCH_COMPATIBLE_QTYPES: torch_tensor = torch_tensor.view(*shape) sd[tensor.name] = GGMLTensor( - torch_tensor, ggml_quantization_type=tensor.tensor_type, tensor_shape=shape, compute_dtype=compute_dtype + torch_tensor, + ggml_quantization_type=tensor.tensor_type, + tensor_shape=shape, + compute_dtype=compute_dtype, ) return sd From 4f31141cbed05f42d45a42c51eadb28656ca2cdd Mon Sep 17 00:00:00 2001 From: gogurtenjoyer <36354352+gogurtenjoyer@users.noreply.github.com> Date: Tue, 4 Nov 2025 12:41:11 -0500 Subject: [PATCH 5/7] ruff --- invokeai/backend/quantization/gguf/loaders.py | 1 + 1 file changed, 1 insertion(+) diff --git a/invokeai/backend/quantization/gguf/loaders.py b/invokeai/backend/quantization/gguf/loaders.py index 7f3435e8399..6fdd32d4ee9 100644 --- a/invokeai/backend/quantization/gguf/loaders.py +++ b/invokeai/backend/quantization/gguf/loaders.py @@ -3,6 +3,7 @@ import gguf import torch + from invokeai.backend.quantization.gguf.ggml_tensor import GGMLTensor from invokeai.backend.quantization.gguf.utils import TORCH_COMPATIBLE_QTYPES from invokeai.backend.util.logging import InvokeAILogger From 53b8902cbf69ce2060b459bd02c47f0295d10601 Mon Sep 17 00:00:00 2001 From: gogurtenjoyer <36354352+gogurtenjoyer@users.noreply.github.com> Date: Sat, 8 Nov 2025 17:15:01 -0500 Subject: [PATCH 6/7] loaders.py fix for linux No longer attempting to delete internal object. --- invokeai/backend/quantization/gguf/loaders.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/invokeai/backend/quantization/gguf/loaders.py b/invokeai/backend/quantization/gguf/loaders.py index 6fdd32d4ee9..5e8c7d96636 100644 --- a/invokeai/backend/quantization/gguf/loaders.py +++ b/invokeai/backend/quantization/gguf/loaders.py @@ -28,7 +28,8 @@ def close(self): """Explicitly close the memory-mapped file.""" if hasattr(self.reader, "data") and hasattr(self.reader.data, "_mmap"): try: - self.reader.data._mmap.close() + self.reader.data.flush() + del self.reader.data except (AttributeError, OSError, ValueError) as e: logger.warning(f"Wasn't able to close GGUF memory map: {e}") del self.reader From 46b0205dd5c1d445b01379547c76bea23aeb4a7e Mon Sep 17 00:00:00 2001 From: gogurtenjoyer <36354352+gogurtenjoyer@users.noreply.github.com> Date: Sat, 8 Nov 2025 17:39:55 -0500 Subject: [PATCH 7/7] loaders.py - one more _mmap ref removed --- invokeai/backend/quantization/gguf/loaders.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/invokeai/backend/quantization/gguf/loaders.py b/invokeai/backend/quantization/gguf/loaders.py index 5e8c7d96636..de210d3da8d 100644 --- a/invokeai/backend/quantization/gguf/loaders.py +++ b/invokeai/backend/quantization/gguf/loaders.py @@ -26,7 +26,7 @@ def __exit__(self, exc_type, exc_val, exc_tb): def close(self): """Explicitly close the memory-mapped file.""" - if hasattr(self.reader, "data") and hasattr(self.reader.data, "_mmap"): + if hasattr(self.reader, "data"): try: self.reader.data.flush() del self.reader.data