diff --git a/Libraries/IntegrationTestHelpers/IntegrationTestHelpers.swift b/Libraries/IntegrationTestHelpers/IntegrationTestHelpers.swift
index 0f424175f..7b6b54239 100644
--- a/Libraries/IntegrationTestHelpers/IntegrationTestHelpers.swift
+++ b/Libraries/IntegrationTestHelpers/IntegrationTestHelpers.swift
@@ -2,7 +2,6 @@
 // Integration packages inject their own Downloader and TokenizerLoader, then call
 // these functions which run the test and throw on failure.
 
-import CoreImage
 import Foundation
 import MLX
 import MLXEmbedders
@@ -10,6 +9,10 @@ import MLXLLM
 import MLXLMCommon
 import MLXVLM
 
+#if canImport(CoreImage)
+    import CoreImage
+#endif
+
 // Both MLXLMCommon and MLXEmbedders define ModelContainer.
 public typealias LLModelContainer = MLXLMCommon.ModelContainer
 public typealias EmbeddingModelContainer = MLXEmbedders.EmbedderModelContainer
@@ -176,18 +179,23 @@ public enum ChatSessionTests {
     }
 
     public static func visionModel(container: LLModelContainer) async throws {
-        let session = ChatSession(container, generateParameters: generateParameters)
-        let redImage = CIImage(color: .red).cropped(
-            to: CGRect(x: 0, y: 0, width: 100, height: 100))
+        #if canImport(CoreImage)
+            let session = ChatSession(container, generateParameters: generateParameters)
+            let redImage = CIImage(color: .red).cropped(
+                to: CGRect(x: 0, y: 0, width: 100, height: 100))
 
-        let result = try await streamAndCollect(
-            session.streamResponse(
-                to: "What color is this image? Reply with just the color name.",
-                image: .ciImage(redImage)), label: "Vision")
-        try check(
-            result.lowercased().contains("red"),
-            "Expected 'red' in response, got: \(result)"
-        )
+            let result = try await streamAndCollect(
+                session.streamResponse(
+                    to: "What color is this image? Reply with just the color name.",
+                    image: .ciImage(redImage)), label: "Vision")
+            try check(
+                result.lowercased().contains("red"),
+                "Expected 'red' in response, got: \(result)"
+            )
+        #else
+            fatalError(
+                "Vision model test requires CoreImage, which is not available on this platform.")
+        #endif
     }
 
     public static func streamDetailsWithTools(container: LLModelContainer) async throws {
diff --git a/Libraries/MLXLMCommon/ChatSession.swift b/Libraries/MLXLMCommon/ChatSession.swift
index 648e39548..4e893d5fd 100644
--- a/Libraries/MLXLMCommon/ChatSession.swift
+++ b/Libraries/MLXLMCommon/ChatSession.swift
@@ -1,9 +1,12 @@
 // Copyright © 2025 Apple Inc.
 
-import CoreGraphics
 import Foundation
 import MLX
 
+#if canImport(CoreGraphics)
+    import CoreGraphics
+#endif
+
 /// Configuration for speculative decoding in a `ChatSession`.
 ///
 /// Speculative decoding uses a small draft model to propose candidate tokens
diff --git a/Libraries/MLXLMCommon/Linux/CoreGraphics.swift b/Libraries/MLXLMCommon/Linux/CoreGraphics.swift
new file mode 100644
index 000000000..3190c27eb
--- /dev/null
+++ b/Libraries/MLXLMCommon/Linux/CoreGraphics.swift
@@ -0,0 +1,22 @@
+// Copyright © 2026 Apple Inc.
+
+#if !canImport(CoreGraphics)
+
+    public typealias CGFloat = Double
+
+    public struct CGSize: Sendable {
+        public var width: CGFloat
+        public var height: CGFloat
+
+        public init(width: CGFloat, height: CGFloat) {
+            self.width = width
+            self.height = height
+        }
+
+        public init(width: Int, height: Int) {
+            self.width = CGFloat(width)
+            self.height = CGFloat(height)
+        }
+    }
+
+#endif
diff --git a/Libraries/MLXLMCommon/Linux/CoreMedia.swift b/Libraries/MLXLMCommon/Linux/CoreMedia.swift
new file mode 100644
index 000000000..2867bc409
--- /dev/null
+++ b/Libraries/MLXLMCommon/Linux/CoreMedia.swift
@@ -0,0 +1,10 @@
+// Copyright © 2026 Apple Inc.
+
+#if !canImport(CoreMedia)
+
+    public struct CMTime {
+        public var value: Int64
+        public var timescale: Int32
+    }
+
+#endif
diff --git a/Libraries/MLXLMCommon/Linux/Logger.swift b/Libraries/MLXLMCommon/Linux/Logger.swift
new file mode 100644
index 000000000..0b7036908
--- /dev/null
+++ b/Libraries/MLXLMCommon/Linux/Logger.swift
@@ -0,0 +1,29 @@
+// Copyright © 2026 Apple Inc.
+
+#if canImport(os)
+
+    import os
+
+    typealias Logger = os.Logger
+
+#else
+
+    final class Logger: Sendable {
+        private let subsystem: String
+        private let category: String
+
+        init(subsystem: String, category: String) {
+            self.subsystem = subsystem
+            self.category = category
+        }
+
+        func info(_ message: String) {
+            print("[INFO] [\(subsystem).\(category)] \(message)")
+        }
+
+        func error(_ message: String) {
+            print("[ERROR] [\(subsystem).\(category)] \(message)")
+        }
+    }
+
+#endif
diff --git a/Libraries/MLXLMCommon/Linux/String+Linux.swift b/Libraries/MLXLMCommon/Linux/String+Linux.swift
new file mode 100644
index 000000000..9f0569e88
--- /dev/null
+++ b/Libraries/MLXLMCommon/Linux/String+Linux.swift
@@ -0,0 +1,13 @@
+// Copyright © 2026 Apple Inc.
+
+import Foundation
+
+#if os(Linux)
+
+    extension String {
+        public init(localized resource: String) {
+            self = resource
+        }
+    }
+
+#endif
diff --git a/Libraries/MLXLMCommon/ParoQuant/ParoQuantLoader.swift b/Libraries/MLXLMCommon/ParoQuant/ParoQuantLoader.swift
index ebf9d60b9..7e7b301b5 100644
--- a/Libraries/MLXLMCommon/ParoQuant/ParoQuantLoader.swift
+++ b/Libraries/MLXLMCommon/ParoQuant/ParoQuantLoader.swift
@@ -1,7 +1,6 @@
 import Foundation
 import MLX
 import MLXNN
-import os
 
 private let logger = Logger(subsystem: "mlx-swift-lm", category: "paroquant")
 
diff --git a/Libraries/MLXLMCommon/UserInput.swift b/Libraries/MLXLMCommon/UserInput.swift
index 09fc7d226..5f7371bbf 100644
--- a/Libraries/MLXLMCommon/UserInput.swift
+++ b/Libraries/MLXLMCommon/UserInput.swift
@@ -1,10 +1,15 @@
 // Copyright © 2024 Apple Inc.
 
-@preconcurrency import AVFoundation
-import CoreImage
 import Foundation
 import MLX
 
+#if canImport(AVFoundation)
+    @preconcurrency import AVFoundation
+#endif
+#if canImport(CoreImage)
+    import CoreImage
+#endif
+
 public typealias Message = [String: any Sendable]
 
 /// Container for raw user input.
@@ -40,101 +45,131 @@ public struct UserInput {
     }
 
     public struct VideoFrame {
-        public let frame: CIImage
+        public let image: Image
         public let timeStamp: CMTime
 
-        public init(frame: CIImage, timeStamp: CMTime) {
-            self.frame = frame
+        public init(image: Image, timeStamp: CMTime) {
+            self.image = image
             self.timeStamp = timeStamp
         }
+
+        #if canImport(CoreImage)
+
+            @available(
+                *, deprecated,
+                message: "Use init(image:, timeStamp:) instead"
+            )
+            public init(frame: CIImage, timeStamp: CMTime) {
+                self.image = .ciImage(frame)
+                self.timeStamp = timeStamp
+            }
+
+            @available(
+                *, deprecated,
+                message: "Use image.asCIImage()"
+            )
+            public var frame: CIImage {
+                return try! image.asCIImage()
+            }
+
+        #endif
     }
 
     /// Representation of a video resource.
     public enum Video {
-        case avAsset(AVAsset)
+        #if canImport(AVFoundation)
+            case avAsset(AVAsset)
+        #endif
         case url(URL)
         /// Useful for decoded frames held in memory
         case frames([VideoFrame])
 
-        @available(
-            *, deprecated,
-            message: "Use MediaProcessing.asProcessedSequence() with the Video directly"
-        )
-        public func asAVAsset() -> AVAsset {
-            switch self {
-            case .avAsset(let asset):
-                return asset
-            case .url(let url):
-                return AVAsset(url: url)
-            case .frames:
-                fatalError(
-                    "calling asAVAsset() on Video Input with VideoFames provided is unsupported and deprecated - please use MediaProcessing.asProcessedSequence() instead"
-                )
+        #if canImport(AVFoundation)
+            @available(
+                *, deprecated,
+                message: "Use MediaProcessing.asProcessedSequence() with the Video directly"
+            )
+            public func asAVAsset() -> AVAsset {
+                switch self {
+                case .avAsset(let asset):
+                    return asset
+                case .url(let url):
+                    return AVAsset(url: url)
+                case .frames:
+                    fatalError(
+                        "calling asAVAsset() on Video Input with VideoFames provided is unsupported and deprecated - please use MediaProcessing.asProcessedSequence() instead"
+                    )
+                }
             }
-        }
+        #endif
     }
 
     /// Representation of an image resource.
     public enum Image {
-        case ciImage(CIImage)
+        #if canImport(CoreImage)
+            case ciImage(CIImage)
+        #endif
         case url(URL)
         case array(MLXArray)
 
-        public func asCIImage() throws -> CIImage {
-            switch self {
-            case .ciImage(let image):
-                return image
-
-            case .url(let url):
-                if let image = CIImage(contentsOf: url) {
+        #if canImport(CoreImage)
+            public func asCIImage() throws -> CIImage {
+                switch self {
+                case .ciImage(let image):
                     return image
-                }
-                throw UserInputError.unableToLoad(url)
-
-            case .array(let array):
-                guard array.ndim == 3 else {
-                    throw UserInputError.arrayError("array must have 3 dimensions: \(array.ndim)")
-                }
-
-                var array = array
-
-                // convert to 0 .. 255
-                if array.max().item(Float.self) <= 1.0 {
-                    array = array * 255
-                }
 
-                // planar -> pixels
-                switch array.dim(0) {
-                case 3, 4:
-                    // channels first (planar)
-                    array = array.transposed(1, 2, 0)
-                default:
-                    break
+                case .url(let url):
+                    if let image = CIImage(contentsOf: url) {
+                        return image
+                    }
+                    throw UserInputError.unableToLoad(url)
+
+                case .array(let array):
+                    guard array.ndim == 3 else {
+                        throw UserInputError.arrayError(
+                            "array must have 3 dimensions: \(array.ndim)")
+                    }
+
+                    var array = array
+
+                    // convert to 0 .. 255
+                    if array.max().item(Float.self) <= 1.0 {
+                        array = array * 255
+                    }
+
+                    // planar -> pixels
+                    switch array.dim(0) {
+                    case 3, 4:
+                        // channels first (planar)
+                        array = array.transposed(1, 2, 0)
+                    default:
+                        break
+                    }
+
+                    // 4 components per pixel
+                    switch array.dim(-1) {
+                    case 3:
+                        // pad to 4 bytes per pixel
+                        array = padded(array, widths: [0, 0, [0, 1]], value: MLXArray(255))
+                    case 4:
+                        // good
+                        break
+                    default:
+                        throw UserInputError.arrayError(
+                            "channel dimension must be last and 3/4: \(array.shape)")
+                    }
+
+                    let arrayData = array.asData()
+                    let (H, W, _) = array.shape3
+                    let cs = CGColorSpace(name: CGColorSpace.sRGB)!
+
+                    return CIImage(
+                        bitmapData: arrayData.data, bytesPerRow: W * 4,
+                        size: .init(width: W, height: H),
+                        format: .RGBA8, colorSpace: cs)
                 }
-
-                // 4 components per pixel
-                switch array.dim(-1) {
-                case 3:
-                    // pad to 4 bytes per pixel
-                    array = padded(array, widths: [0, 0, [0, 1]], value: MLXArray(255))
-                case 4:
-                    // good
-                    break
-                default:
-                    throw UserInputError.arrayError(
-                        "channel dimension must be last and 3/4: \(array.shape)")
-                }
-
-                let arrayData = array.asData()
-                let (H, W, _) = array.shape3
-                let cs = CGColorSpace(name: CGColorSpace.sRGB)!
-
-                return CIImage(
-                    bitmapData: arrayData.data, bytesPerRow: W * 4,
-                    size: .init(width: W, height: H),
-                    format: .RGBA8, colorSpace: cs)
             }
-        }
+        #endif
     }
 
     /// Representation of processing to apply to media.
diff --git a/Libraries/MLXVLM/MediaProcessing.swift b/Libraries/MLXVLM/MediaProcessing.swift
index 1d75072b1..0b91f11cc 100644
--- a/Libraries/MLXVLM/MediaProcessing.swift
+++ b/Libraries/MLXVLM/MediaProcessing.swift
@@ -443,8 +443,8 @@ public enum MediaProcessing {
             case .success(requestedTime: _, let image, actualTime: let actual):
                 let ciImage = CIImage(
                     cgImage: image, options: [.colorSpace: CGColorSpace(name: CGColorSpace.sRGB)!])
-                let frame = try frameProcessing(.init(frame: ciImage, timeStamp: actual))
-                ciImages.append(frame.frame)
+                let frame = try frameProcessing(.init(image: .ciImage(ciImage), timeStamp: actual))
+                ciImages.append(try frame.image.asCIImage())
                 timestamps.append(frame.timeStamp)
             case .failure(requestedTime: _, _):
                 break
@@ -511,8 +511,8 @@ public enum MediaProcessing {
             if let targetIndex {
                 let videoFrame = videoFrames[targetIndex]
                 let frame = try frameProcessing(
-                    .init(frame: videoFrame.frame, timeStamp: videoFrame.timeStamp))
-                ciImages.append(frame.frame)
+                    .init(image: videoFrame.image, timeStamp: videoFrame.timeStamp))
+                ciImages.append(try frame.image.asCIImage())
                 timestamps.append(frame.timeStamp)
             }
         }
diff --git a/Libraries/MLXVLM/Models/Qwen25VL.swift b/Libraries/MLXVLM/Models/Qwen25VL.swift
index abd4912d9..ebea2aaca 100644
--- a/Libraries/MLXVLM/Models/Qwen25VL.swift
+++ b/Libraries/MLXVLM/Models/Qwen25VL.swift
@@ -760,7 +760,7 @@ public struct Qwen25VLProcessor: UserInputProcessor {
                 ) { frame in
                     // first apply the user requested resizing, etc. if any
                     let resizedImage = MediaProcessing.apply(
-                        frame.frame, processing: input.processing)
+                        try frame.image.asCIImage(), processing: input.processing)
                     if resizedSize == .zero {
                         let size = resizedImage.extent.size
                         let (resizedHeight, resizedWidth) = try QwenVL.targetSize(
@@ -770,7 +770,7 @@ public struct Qwen25VLProcessor: UserInputProcessor {
                         resizedSize = CGSize(width: resizedWidth, height: resizedHeight)
                     }
                     let processedImage = preprocess(image: resizedImage, resizedSize: resizedSize)
-                    return VideoFrame(frame: processedImage, timeStamp: frame.timeStamp)
+                    return VideoFrame(image: .ciImage(processedImage), timeStamp: frame.timeStamp)
                 }
 
                 videosAsImageSequences.append(imageSequence.frames)
diff --git a/Libraries/MLXVLM/Models/Qwen2VL.swift b/Libraries/MLXVLM/Models/Qwen2VL.swift
index 701f755f1..05b051c0e 100644
--- a/Libraries/MLXVLM/Models/Qwen2VL.swift
+++ b/Libraries/MLXVLM/Models/Qwen2VL.swift
@@ -599,7 +599,7 @@ public struct Qwen2VLProcessor: UserInputProcessor {
                 ) { frame in
                     // first apply the user requested resizing, etc. if any
                     let resizedImage = MediaProcessing.apply(
-                        frame.frame, processing: input.processing)
+                        try frame.image.asCIImage(), processing: input.processing)
                     if resizedSize == .zero {
                         let size = resizedImage.extent.size
                         let (resizedHeight, resizedWidth) = try QwenVL.targetSize(
@@ -609,7 +609,7 @@ public struct Qwen2VLProcessor: UserInputProcessor {
                         resizedSize = CGSize(width: resizedWidth, height: resizedHeight)
                     }
                     let processedImage = preprocess(image: resizedImage, resizedSize: resizedSize)
-                    return VideoFrame(frame: processedImage, timeStamp: frame.timeStamp)
+                    return VideoFrame(image: .ciImage(processedImage), timeStamp: frame.timeStamp)
                 }
 
                 videosAsImageSequences.append(imageSequence.frames)
diff --git a/Libraries/MLXVLM/Models/Qwen3VL.swift b/Libraries/MLXVLM/Models/Qwen3VL.swift
index 523553a16..775f9553b 100644
--- a/Libraries/MLXVLM/Models/Qwen3VL.swift
+++ b/Libraries/MLXVLM/Models/Qwen3VL.swift
@@ -112,7 +112,8 @@ public struct Qwen3VLProcessor: UserInputProcessor {
                 let sequence = try await MediaProcessing.asProcessedSequence(
                     video, targetFPS: { _ in Double(2) }
                 ) { frame in
-                    let processed = MediaProcessing.apply(frame.frame, processing: input.processing)
+                    let processed = MediaProcessing.apply(
+                        try frame.image.asCIImage(), processing: input.processing)
                     if resizedSize == .zero {
                         let size = processed.extent.size
                         let (height, width) = try QwenVL.targetSize(
@@ -124,7 +125,7 @@ public struct Qwen3VLProcessor: UserInputProcessor {
                         resizedSize = CGSize(width: width, height: height)
                     }
                     let finalImage = preprocess(image: processed, resizedSize: resizedSize)
-                    return VideoFrame(frame: finalImage, timeStamp: frame.timeStamp)
+                    return VideoFrame(image: .ciImage(finalImage), timeStamp: frame.timeStamp)
                 }
                 accumulatedFrames.append(sequence.frames)
             }
diff --git a/Libraries/MLXVLM/Models/SmolVLM2.swift b/Libraries/MLXVLM/Models/SmolVLM2.swift
index c10772054..5591aa76c 100644
--- a/Libraries/MLXVLM/Models/SmolVLM2.swift
+++ b/Libraries/MLXVLM/Models/SmolVLM2.swift
@@ -320,14 +320,15 @@ public struct SmolVLMProcessor: UserInputProcessor {
                 }
             ) { frame in
 
-                let processedFrame = frame.frame
+                let processedFrame = try frame.image
+                    .asCIImage()
                     .toSRGB()
                     .resampled(
                         to: CGSize(width: fixedImageSize, height: fixedImageSize),
                         method: CIImage.ResamplingMethod.lanczos
                     )
                     .normalized(mean: config.imageMeanTuple, std: config.imageStdTuple)
-                return VideoFrame(frame: processedFrame, timeStamp: frame.timeStamp)
+                return VideoFrame(image: .ciImage(processedFrame), timeStamp: frame.timeStamp)
             }
 
             let thwFrames = (0 ..< processedFrames.frames.count).map {
diff --git a/Tests/MLXLMTests/MediaProcessingTests.swift b/Tests/MLXLMTests/MediaProcessingTests.swift
index 9c6b7e7a6..efe16c2d7 100644
--- a/Tests/MLXLMTests/MediaProcessingTests.swift
+++ b/Tests/MLXLMTests/MediaProcessingTests.swift
@@ -72,9 +72,10 @@ public class MediaProcesingTests: XCTestCase {
         // We know video is exactly 5 seconds long, expect 10 samples
         let frames = try await MediaProcessing.asProcessedSequence(video, samplesPerSecond: 2) {
             frame in
-            let image = preprocess(image: frame.frame, resizedSize: .init(width: 224, height: 224))
+            let image = preprocess(
+                image: try frame.image.asCIImage(), resizedSize: .init(width: 224, height: 224))
 
-            return VideoFrame.init(frame: image, timeStamp: frame.timeStamp)
+            return VideoFrame.init(image: .ciImage(image), timeStamp: frame.timeStamp)
         }
 
         XCTAssert(frames.frames.count == 10)
@@ -101,7 +102,7 @@ public class MediaProcesingTests: XCTestCase {
         for i in 0 ..< (seconds * framerate) {
             let image = imageWithColor(colors.randomElement()!)
             let timeStamp: CMTime = .init(value: Int64(i), timescale: Int32(framerate))
-            rawFrames.append(VideoFrame(frame: image, timeStamp: timeStamp))
+            rawFrames.append(VideoFrame(image: .ciImage(image), timeStamp: timeStamp))
         }
 
         // Bogus preprocessing values
@@ -117,9 +118,10 @@ public class MediaProcesingTests: XCTestCase {
         // We know video is exactly 5 seconds long, expect 10 samples
         let frames = try await MediaProcessing.asProcessedSequence(video, samplesPerSecond: 2) {
             frame in
-            let image = preprocess(image: frame.frame, resizedSize: .init(width: 224, height: 224))
+            let image = preprocess(
+                image: try frame.image.asCIImage(), resizedSize: .init(width: 224, height: 224))
 
-            return VideoFrame.init(frame: image, timeStamp: frame.timeStamp)
+            return VideoFrame.init(image: .ciImage(image), timeStamp: frame.timeStamp)
         }
 
         XCTAssert(frames.frames.count == 10)