Skip to content

Commit 9a69252

Browse files
shoumikhinfacebook-github-bot
authored andcommitted
Add image to multimodal runner test.
Summary: . Differential Revision: D82183713
1 parent 56d5186 commit 9a69252

File tree

2 files changed

+38
-3
lines changed

2 files changed

+38
-3
lines changed

extension/llm/apple/ExecuTorchLLM/__tests__/MultimodalRunnerTest.swift

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,11 +9,43 @@
99
import ExecuTorchLLM
1010
import XCTest
1111

12+
extension UIImage {
13+
func asImage() -> Image {
14+
let cgImage = self.cgImage!
15+
let width = cgImage.width
16+
let height = cgImage.height
17+
let pixelCount = width * height
18+
let bytesPerPixel = 4
19+
let bytesPerRow = bytesPerPixel * width
20+
var pixelBytes = [UInt8](repeating: 0, count: pixelCount * bytesPerPixel)
21+
let context = CGContext(
22+
data: &pixelBytes,
23+
width: width,
24+
height: height,
25+
bitsPerComponent: 8,
26+
bytesPerRow: bytesPerRow,
27+
space: CGColorSpaceCreateDeviceRGB(),
28+
bitmapInfo: CGImageAlphaInfo.premultipliedLast.rawValue | CGBitmapInfo.byteOrder32Big.rawValue
29+
)!
30+
context.draw(cgImage, in: CGRect(x: 0, y: 0, width: width, height: height))
31+
var rgbBytes = [UInt8](repeating: 0, count: pixelCount * 3)
32+
for i in 0..<pixelCount {
33+
let pixelOffset = i * bytesPerPixel
34+
rgbBytes[i] = pixelBytes[pixelOffset]
35+
rgbBytes[i + pixelCount] = pixelBytes[pixelOffset + 1]
36+
rgbBytes[i + pixelCount * 2] = pixelBytes[pixelOffset + 2]
37+
}
38+
return Image(data: Data(rgbBytes), width: width, height: height, channels: 3)
39+
}
40+
}
41+
1242
class MultimodalRunnerTest: XCTestCase {
1343
func test() {
1444
let bundle = Bundle(for: type(of: self))
1545
guard let modelPath = bundle.path(forResource: "llava", ofType: "pte"),
16-
let tokenizerPath = bundle.path(forResource: "tokenizer", ofType: "bin") else {
46+
let tokenizerPath = bundle.path(forResource: "tokenizer", ofType: "bin"),
47+
let imagePath = bundle.path(forResource: "IMG_0005", ofType: "JPG"),
48+
let image = UIImage(contentsOfFile: imagePath) else {
1749
XCTFail("Couldn't find model or tokenizer files")
1850
return
1951
}
@@ -22,12 +54,15 @@ class MultimodalRunnerTest: XCTestCase {
2254
var text = ""
2355

2456
do {
25-
try runner.generate([MultimodalInput("hello")], sequenceLength: 2) { token in
57+
try runner.generate([
58+
MultimodalInput("What's this?"),
59+
MultimodalInput(image.asImage()),
60+
], sequenceLength: 2) { token in
2661
text += token
2762
}
2863
} catch {
2964
XCTFail("Failed to generate text with error \(error)")
3065
}
31-
XCTAssertEqual("hello,", text.lowercased())
66+
XCTAssertTrue(text.lowercased().contains("waterfall"))
3267
}
3368
}
1.77 MB
Loading

0 commit comments

Comments
 (0)