drprojects
diff --git a/‎docs/data_structures.md
Lines changed: 1 addition & 1 deletion b/‎docs/data_structures.md
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/data/cluster.py
Lines changed: 15 additions & 15 deletions b/‎src/data/cluster.py
Lines changed: 15 additions & 15 deletions
diff --git a/‎src/data/csr.py
Lines changed: 4 additions & 4 deletions b/‎src/data/csr.py
Lines changed: 4 additions & 4 deletions
diff --git a/‎src/data/data.py
Lines changed: 116 additions & 10 deletions b/‎src/data/data.py
Lines changed: 116 additions & 10 deletions
@@ -48,7 +48,7 @@ Important specificities of our `Data` object are:
 `j` with `i<j`
 - `NAG.get_sampling()` produces indices for sampling the superpoints with 
 certain constraints
-- `NAG.save()` and `NAG.load()` allow optimized, memory-friedly I/O operations
+- `NAG.save()` and `NAG.load()` allow optimized, memory-friendly I/O operations
 - `NAG.select()` indexes the nodes of a specified partition level à la numpy 
 and updates the rest of the `NAG` structure accordingly
 - `NAG.show()` for interactive visualization (see 
 
@@ -136,8 +136,8 @@ def save(self, f, fp_dtype=torch.float):
         save_tensor(self.pointers, f, 'pointers', fp_dtype=fp_dtype)
         save_tensor(self.points, f, 'points', fp_dtype=fp_dtype)
 
-    @staticmethod
-    def load(f, idx=None, update_sub=True, verbose=False):
+    @classmethod
+    def load(cls, f, idx=None, update_sub=True, verbose=False):
         """Load Cluster from an HDF5 file. See `Cluster.save` for
         writing such file. Options allow reading only part of the
         clusters.
@@ -163,7 +163,7 @@ def load(f, idx=None, update_sub=True, verbose=False):
 
         if not isinstance(f, (h5py.File, h5py.Group)):
             with h5py.File(f, 'r') as file:
-                out = Cluster.load(
+                out = cls.load(
                     file, idx=idx, update_sub=update_sub, verbose=verbose)
             return out
 
@@ -172,34 +172,34 @@ def load(f, idx=None, update_sub=True, verbose=False):
         start = time()
         idx = tensor_idx(idx)
         if verbose:
-            print(f'Cluster.load tensor_idx         : {time() - start:0.5f}s')
+            print(f'{cls.__name__}.load tensor_idx         : {time() - start:0.5f}s')
 
         if idx is None or idx.shape[0] == 0:
             start = time()
             pointers = load_tensor(f['pointers'])
             points = load_tensor(f['points'])
             if verbose:
-                print(f'Cluster.load read all           : {time() - start:0.5f}s')
+                print(f'{cls.__name__}.load read all           : {time() - start:0.5f}s')
             start = time()
-            out = Cluster(pointers, points), (None, None)
+            out = cls(pointers, points), (None, None)
             if verbose:
-                print(f'Cluster.load init               : {time() - start:0.5f}s')
+                print(f'{cls.__name__}.load init               : {time() - start:0.5f}s')
             return out
 
         # Read only pointers start and end indices based on idx
         start = time()
         ptr_start = load_tensor(f['pointers'], idx=idx)
         ptr_end = load_tensor(f['pointers'], idx=idx + 1)
         if verbose:
-            print(f'Cluster.load read ptr       : {time() - start:0.5f}s')
+            print(f'{cls.__name__}.load read ptr       : {time() - start:0.5f}s')
 
         # Create the new pointers
         start = time()
         pointers = torch.cat([
             torch.zeros(1, dtype=ptr_start.dtype),
             torch.cumsum(ptr_end - ptr_start, 0)])
         if verbose:
-            print(f'Cluster.load new pointers   : {time() - start:0.5f}s')
+            print(f'{cls.__name__}.load new pointers   : {time() - start:0.5f}s')
 
         # Create the indexing tensor to select and order values.
         # Simply, we could have used a list of slices, but we want to
@@ -212,19 +212,19 @@ def load(f, idx=None, update_sub=True, verbose=False):
             pointers[:-1]].repeat_interleave(sizes)
         val_idx += ptr_start.repeat_interleave(sizes)
         if verbose:
-            print(f'Cluster.load val_idx        : {time() - start:0.5f}s')
+            print(f'{cls.__name__}.load val_idx        : {time() - start:0.5f}s')
 
         # Read the points, now we have computed the val_idx
         start = time()
         points = load_tensor(f['points'], idx=val_idx)
         if verbose:
-            print(f'Cluster.load read points    : {time() - start:0.5f}s')
+            print(f'{cls.__name__}.load read points    : {time() - start:0.5f}s')
 
         # Build the Cluster object
         start = time()
-        cluster = Cluster(pointers, points)
+        cluster = cls(pointers, points)
         if verbose:
-            print(f'Cluster.load init           : {time() - start:0.5f}s')
+            print(f'{cls.__name__}.load init           : {time() - start:0.5f}s')
 
         if not update_sub:
             return cluster, (None, None)
@@ -239,7 +239,7 @@ def load(f, idx=None, update_sub=True, verbose=False):
         idx_sub = cluster.points[perm]
         cluster.points = new_cluster_points
         if verbose:
-            print(f'Cluster.load update_sub     : {time() - start:0.5f}s')
+            print(f'{cls.__name__}.load update_sub     : {time() - start:0.5f}s')
 
         # Selecting the subpoints with 'idx_sub' will not be
         # enough to maintain consistency with the current points. We
@@ -248,7 +248,7 @@ def load(f, idx=None, update_sub=True, verbose=False):
         start = time()
         sub_super = cluster.to_super_index()
         if verbose:
-            print(f'Cluster.load super_index    : {time() - start:0.5f}s')
+            print(f'{cls.__name__}.load super_index    : {time() - start:0.5f}s')
 
         return cluster, (idx_sub, sub_super)
 
 
@@ -251,7 +251,7 @@ def __getitem__(self, idx):
 
         else:
             # Select the pointers and prepare the values indexing
-            pointers, val_idx = CSRData.index_select_pointers(
+            pointers, val_idx = self.__class__.index_select_pointers(
                 self.pointers, idx)
             out.pointers = pointers
             out.values = [v[val_idx] for v in self.values]
@@ -348,8 +348,8 @@ def to(self, device, **kwargs):
             if self.__sizes__ is not None else None
         return out
 
-    @staticmethod
-    def from_list(csr_list):
+    @classmethod
+    def from_list(cls, csr_list):
         assert isinstance(csr_list, list) and len(csr_list) > 0
         assert isinstance(csr_list[0], CSRData), \
             "All provided items must be CSRData objects."
@@ -392,7 +392,7 @@ def from_list(csr_list):
         for i in range(num_values):
             val_list = [csr.values[i] for csr in csr_list]
             if isinstance(csr_list[0].values[i], CSRData):
-                val = CSRBatch.from_list(val_list)
+                val = cls.from_list(val_list)
             elif is_index_value[i]:
                 # "Index" values are stacked with updated indices.
                 # For Clusters, this implies all point indices are
 
@@ -616,11 +616,13 @@ def save(
             else:
                 raise NotImplementedError(f'Unsupported type={type(val)}')
 
-    @staticmethod
+    @classmethod
     def load(
-            f, idx=None, keys_idx=None, keys=None, update_sub=True,
+            cls, f, idx=None, keys_idx=None, keys=None, update_sub=True,
             verbose=False, rgb_to_float=False):
-        """Read an HDF5 file and return its content as a dictionary.
+        """Read an HDF5 file and return its content as a Data object.
+
+        NB: if relevant, a Batch object will be returned.
 
         :param f: h5 file path of h5py.File or h5py.Group
         :param idx: int, list, numpy.ndarray, torch.Tensor
@@ -644,17 +646,25 @@ def load(
         """
         if not isinstance(f, (h5py.File, h5py.Group)):
             with h5py.File(f, 'r') as file:
-                out = Data.load(
+                out = cls.load(
                     file, idx=idx, keys_idx=keys_idx, keys=keys,
                     update_sub=update_sub, verbose=verbose,
                     rgb_to_float=rgb_to_float)
             return out
 
+        # Check if the file actually corresponds to a Batch object
+        # rather than a simple Data object
+        if 'batch_item_0' in f.keys():
+            return Batch.load(
+                f, idx=idx, keys_idx=keys_idx, keys=keys,
+                update_sub=update_sub, verbose=verbose,
+                rgb_to_float=rgb_to_float)
+
         idx = tensor_idx(idx)
         if idx.shape[0] == 0:
             keys_idx = []
         elif keys_idx is None:
-            keys_idx = list(set(f.keys()) - set(Data._NOT_INDEXABLE))
+            keys_idx = list(set(f.keys()) - set(cls._NOT_INDEXABLE))
         if keys is None:
             all_keys = list(f.keys())
             for k in ['_csr_', '_cluster_', '_obj_']:
@@ -685,7 +695,7 @@ def load(
             elif k in keys:
                 d_dict[k] = load_tensor(f[k])
             if verbose and k in d_dict.keys():
-                print(f'Data.load {k:<22}: {time() - start:0.5f}s')
+                print(f'{cls.__name__}.load {k:<22}: {time() - start:0.5f}s')
 
         # Update the 'keys_idx' with newly-found 'csr_keys',
         # 'cluster_keys', and 'obj_keys'
@@ -703,7 +713,7 @@ def load(
             elif k in keys:
                 d_dict[k] = load_csr_to_dense(f['_csr_'][k], verbose=verbose)
             if verbose and k in d_dict.keys():
-                print(f'Data.load {k:<22}: {time() - start:0.5f}s')
+                print(f'{cls.__name__}.load {k:<22}: {time() - start:0.5f}s')
 
         # Special key '_cluster_' holds Cluster data
         for k in cluster_keys:
@@ -717,7 +727,7 @@ def load(
                     f['_cluster_'][k], update_sub=update_sub,
                     verbose=verbose)[0]
             if verbose and k in d_dict.keys():
-                print(f'Data.load {k:<22}: {time() - start:0.5f}s')
+                print(f'{cls.__name__}.load {k:<22}: {time() - start:0.5f}s')
 
         # Special key '_obj_' holds InstanceData data
         for k in obj_keys:
@@ -728,7 +738,7 @@ def load(
             elif k in keys:
                 d_dict[k] = InstanceData.load(f['_obj_'][k], verbose=verbose)
             if verbose and k in d_dict.keys():
-                print(f'Data.load {k:<22}: {time() - start:0.5f}s')
+                print(f'{cls.__name__}.load {k:<22}: {time() - start:0.5f}s')
 
         # In case RGB is among the keys and is in integer type, convert
         # to float
@@ -737,7 +747,7 @@ def load(
                 d_dict[k] = to_float_rgb(d_dict[k]) if rgb_to_float \
                     else to_byte_rgb(d_dict[k])
 
-        return Data(**d_dict)
+        return cls(**d_dict)
 
     def estimate_instance_centroid(self, mode='iou'):
         """Estimate the centroid position of each target instance
@@ -959,3 +969,99 @@ def get_example(self, idx):
             self.obj = obj_bckp
 
         return data
+
+    def save(
+            self,
+            f,
+            y_to_csr=True,
+            pos_dtype=torch.float,
+            fp_dtype=torch.float):
+        """Save Batch to HDF5 file.
+
+        :param f: h5 file path of h5py.File or h5py.Group
+        :param y_to_csr: bool
+            Convert 'y' to CSR format before saving. Only applies if
+            'y' is a 2D histogram
+        :param pos_dtype: torch dtype
+            Data type to which 'pos' should be cast before saving. The
+            reason for this separate treatment of 'pos' is that global
+            coordinates may be too large and casting to 'fp_dtype' may
+            result in hurtful precision loss
+        :param fp_dtype: torch dtype
+            Data type to which floating point tensors should be cast
+            before saving
+        :return:
+        """
+        # To facilitate Batch serialization, we store the Batch as a
+        # list of Data objects rather than a single Data object
+        data_list = self.to_data_list()
+
+        if not isinstance(f, (h5py.File, h5py.Group)):
+            with h5py.File(f, 'w') as file:
+                self.save(
+                    file,
+                    y_to_csr=y_to_csr,
+                    pos_dtype=pos_dtype,
+                    fp_dtype=fp_dtype)
+            return
+
+        assert isinstance(f, (h5py.File, h5py.Group))
+
+        # Save each individual Data object
+        for i, data in enumerate(data_list):
+            g = f.create_group(f'batch_item_{i}')
+            data.save(
+                g,
+                y_to_csr=y_to_csr,
+                pos_dtype=pos_dtype,
+                fp_dtype=fp_dtype)
+
+    @classmethod
+    def load(
+            cls, f, idx=None, keys_idx=None, keys=None, update_sub=True,
+            verbose=False, rgb_to_float=False):
+        """Read an HDF5 file and return its content as a Batch object.
+
+        :param f: h5 file path of h5py.File or h5py.Group
+        :param idx: int, list, numpy.ndarray, torch.Tensor
+            Used to select the elements in `keys_idx`. Supports fancy
+            indexing
+        :param keys_idx: List(str)
+            Keys on which the indexing should be applied
+        :param keys: List(str)
+            Keys should be loaded from the file, ignoring the rest
+        :param update_sub: bool
+            If True, the point (i.e. subpoint) indices will also be
+            updated to maintain dense indices. The output will then
+            contain '(idx_sub, sub_super)' which can help apply these
+            changes to maintain consistency with lower hierarchy levels
+            of a NAG.
+        :param verbose: bool
+        :param rgb_to_float: bool
+            If True and an integer 'rgb' or 'mean_rgb' attribute is
+            loaded, it will be cast to float
+        :return:
+        """
+        if not isinstance(f, (h5py.File, h5py.Group)):
+            with h5py.File(f, 'r') as file:
+                out = cls.load(
+                    file, idx=idx, keys_idx=keys_idx, keys=keys,
+                    update_sub=update_sub, verbose=verbose,
+                    rgb_to_float=rgb_to_float)
+            return out
+
+        # Recover each individual Data object making up the Batch object
+        data_list = []
+        num_batch_items = len(f)
+        for i in range(num_batch_items):
+            start = time()
+            data = Data.load(
+                f[f'batch_item_{i}'], idx=idx, keys_idx=keys_idx, keys=keys,
+                update_sub=update_sub, verbose=verbose,
+                rgb_to_float=rgb_to_float)
+            data_list.append(data)
+            if verbose:
+                print(f'{cls.__name__}.load item-{i:<15} : 'f'{time() - start:0.3f}s\n')
+
+        # Return a Batch object
+        return cls.from_data_list(data_list)