Skip to content

Commit

Permalink
Merge pull request #796 from jwlodek/add-jpeg-mimetype
Browse files Browse the repository at this point in the history
Add jpeg adapter, support for image/jpeg mimetype
  • Loading branch information
genematx authored Oct 30, 2024
2 parents 915b629 + 9af3553 commit d1ca0bf
Show file tree
Hide file tree
Showing 9 changed files with 584 additions and 182 deletions.
5 changes: 5 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@ Write the date in place of the "Unreleased" in the case a new version is release

# Changelog

## Unreleased

- Add adapters for reading back assets with the image/jpeg and
multipart/related;type=image/jpeg mimetypes.

## v0.1.0b10 (2024-10-11)

- Add kwarg to client logout to auto-clear default identity.
Expand Down
10 changes: 5 additions & 5 deletions tiled/_tests/test_directory_walker.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,10 @@
from ..client import Context, from_context
from ..client.register import (
Settings,
group_tiff_sequences,
group_image_sequences,
identity,
register,
register_tiff_sequence,
register_image_sequence,
skip_all,
strip_suffixes,
)
Expand Down Expand Up @@ -155,14 +155,14 @@ async def test_skip_all_in_combination(tmpdir):
# With skip_all, directories and tiff sequence are registered, but individual files are not
with Context.from_app(build_app(catalog)) as context:
client = from_context(context)
await register(client, tmpdir, walkers=[group_tiff_sequences, skip_all])
await register(client, tmpdir, walkers=[group_image_sequences, skip_all])
assert list(client) == ["one"]
assert "image" in client["one"]


@pytest.mark.asyncio
async def test_tiff_seq_custom_sorting(tmpdir):
"Register TIFFs that are not in alphanumeric order."
"Register images that are not in alphanumeric order."
N = 10
ordering = list(range(N))
random.Random(0).shuffle(ordering)
Expand All @@ -177,7 +177,7 @@ async def test_tiff_seq_custom_sorting(tmpdir):
catalog = in_memory(writable_storage=tmpdir)
with Context.from_app(build_app(catalog)) as context:
client = from_context(context)
await register_tiff_sequence(
await register_image_sequence(
client,
"image",
files,
Expand Down
189 changes: 189 additions & 0 deletions tiled/_tests/test_jpeg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,189 @@
from pathlib import Path

import numpy
import pytest
from PIL import Image

from ..adapters.jpeg import JPEGAdapter, JPEGSequenceAdapter
from ..adapters.mapping import MapAdapter
from ..catalog import in_memory
from ..client import Context, from_context
from ..client.register import IMG_SEQUENCE_EMPTY_NAME_ROOT, register
from ..server.app import build_app
from ..utils import ensure_uri

COLOR_SHAPE = (11, 17, 3)


@pytest.fixture(scope="module")
def client(tmpdir_module):
sequence_directory = Path(tmpdir_module, "sequence")
sequence_directory.mkdir()
filepaths = []
for i in range(3):
# JPEGs can only be 8 bit ints
data = numpy.random.randint(0, 255, (5, 7), dtype="uint8")
filepath = sequence_directory / f"temp{i:05}.jpeg"
Image.fromarray(data).convert("L").save(filepath)
filepaths.append(filepath)
color_data = numpy.random.randint(0, 255, COLOR_SHAPE, dtype="uint8")
path = Path(tmpdir_module, "color.jpeg")
Image.fromarray(color_data).convert("RGB").save(path)

tree = MapAdapter(
{
"color": JPEGAdapter(ensure_uri(path)),
"sequence": JPEGSequenceAdapter.from_uris(
[ensure_uri(filepath) for filepath in filepaths]
),
}
)
app = build_app(tree)
with Context.from_app(app) as context:
client = from_context(context)
yield client


@pytest.mark.parametrize(
"slice_input, correct_shape",
[
(None, (3, 5, 7)),
(0, (5, 7)),
(slice(0, 3, 2), (2, 5, 7)),
((1, slice(0, 3), slice(0, 3)), (3, 3)),
((slice(0, 3), slice(0, 3), slice(0, 3)), (3, 3, 3)),
((..., 0, 0), (3,)),
((0, slice(0, 1), slice(0, 2), ...), (1, 2)),
((0, ..., slice(0, 2)), (5, 2)),
((..., slice(0, 1)), (3, 5, 1)),
],
)
def test_jpeg_sequence(client, slice_input, correct_shape):
arr = client["sequence"].read(slice=slice_input)
assert arr.shape == correct_shape


@pytest.mark.parametrize("block_input, correct_shape", [((0, 0, 0), (1, 5, 7))])
def test_jpeg_sequence_block(client, block_input, correct_shape):
arr = client["sequence"].read_block(block_input)
assert arr.shape == correct_shape


@pytest.mark.asyncio
async def test_jpeg_sequence_order(tmpdir):
"""
directory/
00001.jpeg
00002.jpeg
...
00010.jpeg
"""
data = numpy.ones((4, 5))
num_files = 10
for i in range(num_files):
Image.fromarray(data * i).convert("L").save(Path(tmpdir / f"image{i:05}.jpeg"))

adapter = in_memory(readable_storage=[tmpdir])
with Context.from_app(build_app(adapter)) as context:
client = from_context(context)
await register(client, tmpdir)
for i in range(num_files):
numpy.testing.assert_equal(client["image"][i], data * i)


@pytest.mark.asyncio
async def test_jpeg_sequence_with_directory_walker(tmpdir):
"""
directory/
00001.jpeg
00002.jpeg
...
00010.jpeg
single_image.jpeg
image00001.jpeg
image00002.jpeg
...
image00010.jpeg
other_image00001.jpeg
other_image00002.jpeg
...
other_image00010.jpeg
other_image2_00001.jpeg
other_image2_00002.jpeg
...
other_image2_00010.jpeg
other_file1.csv
other_file2.csv
stuff.csv
"""
data = numpy.random.randint(0, 255, (3, 5), dtype="uint8")
for i in range(10):
Image.fromarray(data).convert("L").save(Path(tmpdir / f"image{i:05}.jpeg"))
Image.fromarray(data).convert("L").save(
Path(tmpdir / f"other_image{i:05}.jpeg")
)
Image.fromarray(data).convert("L").save(Path(tmpdir / f"{i:05}.jpeg"))
Image.fromarray(data).convert("L").save(
Path(tmpdir / f"other_image2_{i:05}.jpeg")
)
Image.fromarray(data).save(Path(tmpdir / "single_image.jpeg"))
for target in ["stuff.csv", "other_file1.csv", "other_file2.csv"]:
with open(Path(tmpdir / target), "w") as file:
file.write(
"""
a,b,c
1,2,3
"""
)
adapter = in_memory(readable_storage=[tmpdir])
with Context.from_app(build_app(adapter)) as context:
client = from_context(context)
await register(client, tmpdir)
# Single image is its own node.
assert client["single_image"].shape == (3, 5)
# Each sequence is grouped into a node.
assert client[IMG_SEQUENCE_EMPTY_NAME_ROOT].shape == (10, 3, 5)
assert client["image"].shape == (10, 3, 5)
assert client["other_image"].shape == (10, 3, 5)
assert client["other_image2_"].shape == (10, 3, 5)
# The sequence grouping digit-only files appears with a uuid
named_keys = [
"single_image",
"image",
"other_image",
"other_image2_",
"other_file1",
"other_file2",
"stuff",
]
no_name_keys = [key for key in client.keys() if key not in named_keys]
# There is only a single one of this type
assert len(no_name_keys) == 1
assert client[no_name_keys[0]].shape == (10, 3, 5)
# Other files are single nodes.
assert client["stuff"].columns == ["a", "b", "c"]
assert client["other_file1"].columns == ["a", "b", "c"]
assert client["other_file2"].columns == ["a", "b", "c"]


def test_rgb(client):
"Test an RGB JPEG."
arr = client["color"].read()
assert arr.shape == COLOR_SHAPE


def test_jpeg_sequence_cache(client):
from numpy.testing import assert_raises

# The two requests go through the same method in the server (read_block) to
# call the same object
indexed_array = client["sequence"][0]
read_array = client["sequence"].read(0)

# Using a different index to confirm that the previous cache doesn't affect the new array
other_read_array = client["sequence"].read(1)

numpy.testing.assert_equal(indexed_array, read_array)
assert_raises(
AssertionError, numpy.testing.assert_equal, read_array, other_read_array
)
4 changes: 2 additions & 2 deletions tiled/_tests/test_tiff.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
from ..adapters.tiff import TiffAdapter, TiffSequenceAdapter
from ..catalog import in_memory
from ..client import Context, from_context
from ..client.register import TIFF_SEQUENCE_EMPTY_NAME_ROOT, register
from ..client.register import IMG_SEQUENCE_EMPTY_NAME_ROOT, register
from ..server.app import build_app
from ..utils import ensure_uri

Expand Down Expand Up @@ -137,7 +137,7 @@ async def test_tiff_sequence_with_directory_walker(tmpdir):
# Single image is its own node.
assert client["single_image"].shape == (3, 5)
# Each sequence is grouped into a node.
assert client[TIFF_SEQUENCE_EMPTY_NAME_ROOT].shape == (10, 3, 5)
assert client[IMG_SEQUENCE_EMPTY_NAME_ROOT].shape == (10, 3, 5)
assert client["image"].shape == (10, 3, 5)
assert client["other_image"].shape == (10, 3, 5)
assert client["other_image2_"].shape == (10, 3, 5)
Expand Down
133 changes: 133 additions & 0 deletions tiled/adapters/jpeg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
import builtins
from typing import Any, List, Optional, Tuple, Union

import numpy as np
from numpy._typing import NDArray
from PIL import Image

from ..structures.array import ArrayStructure, BuiltinDtype
from ..structures.core import Spec, StructureFamily
from ..utils import path_from_uri
from .protocols import AccessPolicy
from .resource_cache import with_resource_cache
from .sequence import FileSequenceAdapter
from .type_alliases import JSON, NDSlice


class JPEGAdapter:
"""
Read a JPEG file.
Examples
--------
>>> JPEGAdapter("path/to/file.jpeg")
"""

structure_family = StructureFamily.array

def __init__(
self,
data_uri: str,
*,
structure: Optional[ArrayStructure] = None,
metadata: Optional[JSON] = None,
specs: Optional[List[Spec]] = None,
access_policy: Optional[AccessPolicy] = None,
) -> None:
"""
Parameters
----------
data_uri :
structure :
metadata :
specs :
access_policy :
"""
if not isinstance(data_uri, str):
raise Exception
filepath = path_from_uri(data_uri)
cache_key = (Image.open, filepath)
self._file = with_resource_cache(cache_key, Image.open, filepath)
self.specs = specs or []
self._provided_metadata = metadata or {}
self.access_policy = access_policy
if structure is None:
arr = np.asarray(self._file)
structure = ArrayStructure(
shape=arr.shape,
chunks=tuple((dim,) for dim in arr.shape),
data_type=BuiltinDtype.from_numpy_dtype(arr.dtype),
)
self._structure = structure

def metadata(self) -> JSON:
"""
Returns
-------
"""
return self._provided_metadata.copy()

def read(self, slice: Optional[NDSlice] = None) -> NDArray[Any]:
"""
Parameters
----------
slice :
Returns
-------
"""
arr = np.asarray(self._file)
if slice is not None:
arr = arr[slice]
return arr

def read_block(
self, block: Tuple[int, ...], slice: Optional[builtins.slice] = None
) -> NDArray[Any]:
"""
Parameters
----------
block :
slice :
Returns
-------
"""
if sum(block) != 0:
raise IndexError(block)

arr = np.asarray(self._file)
if slice is not None:
arr = arr[slice]
return arr

def structure(self) -> ArrayStructure:
"""
Returns
-------
"""
return self._structure


class JPEGSequenceAdapter(FileSequenceAdapter):
def _load_from_files(
self, slice: Union[builtins.slice, int] = slice(None)
) -> NDArray[Any]:
from PIL import Image

if isinstance(slice, int):
return np.asarray(Image.open(self.filepaths[slice]))[None, ...]
else:
return np.asarray(
[np.asarray(Image.open(file)) for file in self.filepaths[slice]]
)
Loading

0 comments on commit d1ca0bf

Please sign in to comment.