Add Absolute Position Encoder (#2)

jacobbieker · flowirtz · JackKelly · web-flow · commit 2de25e20aec7 · 2021-10-20T14:13:12.000+01:00
#minor 

* Move position_encoding from nowcasting_dataset PR

* Start on encoding modalities checks

* Add getting needed information for relative encoding

* Add absolute encoding, more steps for relative encoding

* Make stub for subselecting position encoding

* Add first bit of subselecting position encoding tensor

* Remove relative encoding, split into different branch

* Go through the batch of datetimes

* Simplify code

* Add datetime feature creation test

* Add test for normalizing geospatial coordinates

* Add test for absolute position encoding

* Run black on tests

* Add test for encode position

* Add not implemented test

* Add test for encoding modalities

* Add test for multi-modality encoding

* Add PV to multi-modality test

* Remove stub tests

* Add requirement

* Bump version

* Reduce repeats of time features

Instead of matching the number of channels in the spatial dimension, just have it be single channel, as its a single value per timestep

* Add check for identical features in different modalities

* Make check more robust

* Add spatial checks as well

* Add docstring on kwargs

* Remove relative and both options for encoding

* Update nowcasting_dataloader/utils/position_encoding.py

Co-authored-by: Flo &lt;6052785+flowirtz@users.noreply.github.com&gt;

* Update tests/test_position_encoding.py

Co-authored-by: Flo &lt;6052785+flowirtz@users.noreply.github.com&gt;

* Add skipping tests

* Update nowcasting_dataloader/utils/position_encoding.py

Co-authored-by: Jack Kelly &lt;jack@OpenClimateFix.org&gt;

* Update nowcasting_dataloader/utils/position_encoding.py

Co-authored-by: Jack Kelly &lt;jack@OpenClimateFix.org&gt;

* Update nowcasting_dataloader/utils/position_encoding.py

Co-authored-by: Jack Kelly &lt;jack@OpenClimateFix.org&gt;

* Update nowcasting_dataloader/utils/position_encoding.py

Co-authored-by: Jack Kelly &lt;jack@OpenClimateFix.org&gt;

* Address some PR comments

* Update nowcasting_dataloader/utils/position_encoding.py

Co-authored-by: Jack Kelly &lt;jack@OpenClimateFix.org&gt;

* Address more PR comments

* Remove encode_position

* Switch to using constants

* Remove method

* Fix import

* Fixes

* Fix test

* Change docstring

Co-authored-by: Flo &lt;6052785+flowirtz@users.noreply.github.com&gt;
Co-authored-by: Jack Kelly &lt;jack@OpenClimateFix.org&gt;
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,7 +1,6 @@
 default_language_version:
   python: python3.9
 
-files: ^nowcasting_dataloader/
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
     rev: v3.4.0
@@ -26,17 +25,3 @@ repos:
     hooks:
       - id: prettier
         types: [yaml]
-
-  # Google docstring
-  - repo: https://github.com/PyCQA/pydocstyle
-    rev: 6.1.1
-    hooks:
-      - id: pydocstyle
-        args:
-          [
-            --convention,
-            "google",
-            --add-ignore,
-            "D200,D210,D212,D415",
-            "nowcasting_dataloader",
-          ]
diff --git a/nowcasting_dataloader/data_sources/pv/pv_model.py b/nowcasting_dataloader/data_sources/pv/pv_model.py
@@ -13,7 +13,7 @@
     PV_SYSTEM_ROW_NUMBER,
     PV_SYSTEM_ID,
 )
-from nowcasting_dataset.data_sources.datasource_output import (
+from nowcasting_dataloader.data_sources.datasource_output import (
     DataSourceOutputML,
 )
 from nowcasting_dataset.time import make_random_time_vectors
diff --git a/nowcasting_dataloader/data_sources/satellite/satellite_model.py b/nowcasting_dataloader/data_sources/satellite/satellite_model.py
@@ -8,7 +8,7 @@
 from pydantic import Field
 
 from nowcasting_dataset.consts import Array
-from nowcasting_dataset.data_sources.datasource_output import (
+from nowcasting_dataloader.data_sources.datasource_output import (
     DataSourceOutputML,
 )
 from nowcasting_dataset.time import make_random_time_vectors
diff --git a/nowcasting_dataloader/data_sources/sun/sun_model.py b/nowcasting_dataloader/data_sources/sun/sun_model.py
@@ -5,7 +5,7 @@
 from pydantic import Field, validator
 
 from nowcasting_dataset.consts import Array, SUN_AZIMUTH_ANGLE, SUN_ELEVATION_ANGLE
-from nowcasting_dataset.data_sources.datasource_output import (
+from nowcasting_dataloader.data_sources.datasource_output import (
     DataSourceOutputML,
 )
 from nowcasting_dataset.time import make_random_time_vectors
diff --git a/nowcasting_dataloader/utils/__init__.py b/nowcasting_dataloader/utils/__init__.py
@@ -0,0 +1 @@
+"""Various utilities for loading and transforming data"""
diff --git a/nowcasting_dataloader/utils/position_encoding.py b/nowcasting_dataloader/utils/position_encoding.py
@@ -0,0 +1,222 @@
+"""
+This file contains various ways of performing positional encoding.
+
+These encodings can be:
+- Absolute positioning (i.e. this pixel is at this latitude/longitude, and is at 16:00)
+
+These encodings can also be performed with:
+- Fourier Features, based off what is done in PerceiverIO
+"""
+import numpy as np
+import torch
+import einops
+from math import pi
+from typing import Union, Optional, Dict, List, Tuple, Any
+import datetime
+
+TIME_DIM = 2
+HEIGHT_DIM = 3
+WIDTH_DIM = 4
+
+
+def encode_modalities(
+    modalities_to_encode: Dict[str, torch.Tensor],
+    datetimes: Dict[str, List[datetime.datetime]],
+    geospatial_coordinates: Dict[str, Tuple[np.ndarray, np.ndarray]],
+    geospatial_bounds: Dict[str, float],
+    **kwargs,
+) -> dict[str, torch.Tensor]:
+    """
+    Create a consistent position encoding and encode the positions of the different modalities in time and space
+
+    This position encoding is added as new keys to the dictionary containing the modalities to encode. This is done
+    instead of appending the position encoding in case the position encoding needs to be used for the query to the
+    Perceiver IO model
+
+    This code assumes that there is at least 2 timesteps of at least one modality to be encoded
+
+    Args:
+        modalities_to_encode: Dict of input modalities, i.e. NWP, Satellite, PV, GSP, etc as torch.Tensors in [B, C, T, H, W] ordering
+        datetimes: Dict of datetimes for each modality, giving the actual date for each timestep in the modality
+        geospatial_coordinates: Dict of x, y coordinates for each modality with pixels, used to determine smallest spatial step needed, in OSGB coordinates
+        geospatial_bounds: Max extant of the area where examples could be drawn from, used for normalizing coordinates within an area of interest
+            in the format of a dictionary with the keys {'x_min', 'x_max', 'y_min', 'y_max'}
+        kwargs: Passed to fourier_encode
+
+    Returns:
+        Input modality dictionary where for every 'key' in modalities_to_encode, a new key called 'key+'_position_encoding' will be added
+        containing the absolute position encoding of the examples
+    """
+    position_encodings = {}
+    for key in modalities_to_encode.keys():
+        position_encodings[key + "_position_encoding"] = encode_absolute_position(
+            shape=modalities_to_encode[key].shape,
+            geospatial_coordinates=geospatial_coordinates[key],
+            datetimes=datetimes[key],
+            geospatial_bounds=geospatial_bounds,
+            **kwargs,
+        )
+    # Update original dictionary
+    modalities_to_encode.update(position_encodings)
+    return modalities_to_encode
+
+
+def encode_absolute_position(
+    shape: List[int],
+    geospatial_coordinates: List[np.ndarray],
+    geospatial_bounds: Dict[str, float],
+    datetimes: List[datetime.datetime],
+    **kwargs,
+) -> torch.Tensor:
+    """
+    Encodes the absolute position of the pixels/voxels in time and space
+
+    This should be done per-modality and can be thought of as the relative position of the input modalities across a
+    given year and the area of the Earth covered by all the examples.
+
+    Args:
+        shape: Shape to encode positions for
+        geospatial_coordinates: The geospatial coordinates, in OSGB format
+        datetimes: Time of day and date as a list of datetimes, one for each timestep
+        geospatial_bounds: The geospatial bounds of the area where the examples come from, e.g. the coordinates of the area covered by the SEVIRI RSS image
+        **kwargs:
+
+    Returns:
+        The absolute position encoding for the given shape
+    """
+    datetime_features = create_datetime_features(datetimes)
+
+    # Fourier Features of absolute position
+    encoded_geo_position = normalize_geospatial_coordinates(
+        geospatial_coordinates, geospatial_bounds, **kwargs
+    )
+
+    # Combine time and space features
+    to_concat = [einops.repeat(encoded_geo_position, "b h w c -> b c t h w", t=shape[TIME_DIM])]
+    for date_feature in datetime_features:
+        to_concat.append(
+            einops.repeat(
+                date_feature, "b t -> b c t h w", h=shape[HEIGHT_DIM], w=shape[WIDTH_DIM], c=1
+            )
+        )
+
+    # Now combined into one large encoding
+    absolute_position_encoding = torch.cat(to_concat, dim=1)
+
+    return absolute_position_encoding
+
+
+def normalize_geospatial_coordinates(
+    geospatial_coordinates: List[np.ndarray], geospatial_bounds: Dict[str, float], **kwargs
+) -> torch.Tensor:
+    """
+    Normalize the geospatial coordinates by the max extant to keep everything between -1 and 1, in sin and cos
+
+    This normalization should be against a set geospatial area, so that the same place has the same spatial encoding
+    every time.
+
+    Args:
+        geospatial_coordinates: The coordinates for the pixels in the image
+        geospatial_bounds: The maximum extant
+
+    Returns:
+        The normalized geospatial coordinates, rescaled to between -1 and 1 for the whole extant of the training area
+
+    """
+    # Normalize the X first
+    geospatial_coordinates[0] = (geospatial_coordinates[0] - geospatial_bounds["x_min"]) / (
+        geospatial_bounds["x_max"] - geospatial_bounds["x_min"]
+    )
+    # Normalize the Y second
+    geospatial_coordinates[1] = (geospatial_coordinates[1] - geospatial_bounds["y_min"]) / (
+        geospatial_bounds["y_max"] - geospatial_bounds["y_min"]
+    )
+
+    # Now those are between 0 and 1, want between -1 and 1
+    geospatial_coordinates[0] = geospatial_coordinates[0] * 2 - 1
+    geospatial_coordinates[1] = geospatial_coordinates[1] * 2 - 1
+    # Now create a grid of the coordinates
+    # Have to do it for each individual example in the batch, and zip together x and y for it
+    to_concat = []
+    for idx in range(len(geospatial_coordinates[0])):
+        x = geospatial_coordinates[0][idx]
+        y = geospatial_coordinates[1][idx]
+        grid = torch.meshgrid(x, y)
+        pos = torch.stack(grid, dim=-1)
+        encoded_position = fourier_encode(pos, **kwargs)
+        encoded_position = einops.rearrange(encoded_position, "... n d -> ... (n d)")
+        to_concat.append(encoded_position)
+
+    encoded_position = torch.stack(to_concat, dim=0)
+    return encoded_position
+
+
+def create_datetime_features(
+    datetimes: List[List[datetime.datetime]],
+) -> List[torch.Tensor]:
+    """
+    Converts a list of datetimes to day of year, hour of day sin and cos representation
+
+    Args:
+        datetimes: List of list of datetimes for the examples in a batch
+
+    Returns:
+        Tuple of torch Tensors containing the hour of day sin,cos, and day of year sin,cos
+    """
+    hour_of_day = []
+    day_of_year = []
+    for batch_idx in range(len(datetimes)):
+        hours = []
+        days = []
+        for index in datetimes[batch_idx]:
+            hours.append((index.hour + (index.minute / 60) / 24))
+            days.append((index.timetuple().tm_yday / 365))
+        hour_of_day.append(hours)
+        day_of_year.append(days)
+
+    outputs = []
+    for index in [hour_of_day, day_of_year]:
+        index = torch.as_tensor(index)
+        radians = index * 2 * np.pi
+        index_sin = torch.sin(radians)
+        index_cos = torch.cos(radians)
+        outputs.append(index_sin)
+        outputs.append(index_cos)
+
+    return outputs
+
+
+def fourier_encode(
+    x: torch.Tensor,
+    max_freq: float,
+    num_bands: int = 4,
+    sine_only: bool = False,
+) -> torch.Tensor:
+    """
+    Create Fourier Encoding
+
+    Args:
+        x: Input Torch Tensor
+        max_freq: Maximum frequency for the Fourier features
+        num_bands: Number of frequency bands
+        sine_only: Whether to only use sine or both sine and cosine features
+
+    Returns:
+        Torch Tensor with the fourier position encoded concatenated
+    """
+    x = x.unsqueeze(-1)
+    device, dtype, orig_x = x.device, x.dtype, x
+
+    scales = torch.linspace(
+        1.0,
+        max_freq / 2,
+        num_bands,
+        device=device,
+        dtype=dtype,
+    )
+    scales = scales[(*((None,) * (len(x.shape) - 1)), Ellipsis)]
+
+    x = x * scales * pi
+    x = x.sin() if sine_only else torch.cat([x.sin(), x.cos()], dim=-1)
+    x = torch.cat((x, orig_x), dim=-1)
+    return x
diff --git a/requirements.txt b/requirements.txt
@@ -1,3 +1,4 @@
 nowcasting_dataset
 torch
 pytorch-lightning
+einops
diff --git a/tests/test_position_encoding.py b/tests/test_position_encoding.py

Original file line number	Diff line number	Diff line change
`@@ -13,7 +13,7 @@`
`13`	`13`	`PV_SYSTEM_ROW_NUMBER,`
`14`	`14`	`PV_SYSTEM_ID,`
`15`	`15`	`)`
`16`		`-from nowcasting_dataset.data_sources.datasource_output import (`
	`16`	`+from nowcasting_dataloader.data_sources.datasource_output import (`
`17`	`17`	`DataSourceOutputML,`
`18`	`18`	`)`
`19`	`19`	`from nowcasting_dataset.time import make_random_time_vectors`
Original file line number	Diff line number	Diff line change
`@@ -8,7 +8,7 @@`
`8`	`8`	`from pydantic import Field`
`9`	`9`
`10`	`10`	`from nowcasting_dataset.consts import Array`
`11`		`-from nowcasting_dataset.data_sources.datasource_output import (`
	`11`	`+from nowcasting_dataloader.data_sources.datasource_output import (`
`12`	`12`	`DataSourceOutputML,`
`13`	`13`	`)`
`14`	`14`	`from nowcasting_dataset.time import make_random_time_vectors`
Original file line number	Diff line number	Diff line change
`@@ -5,7 +5,7 @@`
`5`	`5`	`from pydantic import Field, validator`
`6`	`6`
`7`	`7`	`from nowcasting_dataset.consts import Array, SUN_AZIMUTH_ANGLE, SUN_ELEVATION_ANGLE`
`8`		`-from nowcasting_dataset.data_sources.datasource_output import (`
	`8`	`+from nowcasting_dataloader.data_sources.datasource_output import (`
`9`	`9`	`DataSourceOutputML,`
`10`	`10`	`)`
`11`	`11`	`from nowcasting_dataset.time import make_random_time_vectors`
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+"""Various utilities for loading and transforming data"""`
-Original file line number
+Diff line change
@@ @@ -1,3 +1,4 @@ @@
 nowcasting_dataset
 torch
 pytorch-lightning
 +einops