Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add torchvision.transforms.Resize interpolation and antialias. #1441

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions RELEASENOTES.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@ __API Changes__:
#1374 Add accumulate to index_put_<br/>
`torch.optim.lr_scheduler.PolynomialLR` `power` type has been corrected, is now double.<br/>
Returning an input tensor has been corrected, is now `alias()`.<br/>
Add `torchvision.transforms.Resize` `interpolation` and `antialias`.<br />

# NuGet Version 0.105.0

Expand Down
5 changes: 4 additions & 1 deletion src/Native/LibTorchSharp/THSNN.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ void ApplyInterpolateMode(T& opts, const int8_t mode)
opts = opts.mode(torch::kTrilinear);
if (mode == 5)
opts = opts.mode(torch::kArea);
if (mode == 6)
opts = opts.mode(torch::kNearestExact);
}

template<typename T>
Expand Down Expand Up @@ -176,13 +178,14 @@ Tensor THSNN_affine_grid(const Tensor theta, const int64_t* size, const int size
}


EXPORT_API(Tensor) THSNN_interpolate(const Tensor input, const int64_t* size, const int size_len, const double* scale_factor, const int scale_factor_len, const int8_t mode, const int8_t align_corners, const bool recompute_scale_factor, NNAnyModule* outAsAnyModule)
EXPORT_API(Tensor) THSNN_interpolate(const Tensor input, const int64_t* size, const int size_len, const double* scale_factor, const int scale_factor_len, const int8_t mode, const int8_t align_corners, const bool recompute_scale_factor, const bool antialias, NNAnyModule* outAsAnyModule)
{
auto opts = torch::nn::functional::InterpolateFuncOptions().recompute_scale_factor(recompute_scale_factor);
// align_corners -- 0=None, 1=true, 2=false
if (align_corners != 0)
opts.align_corners(align_corners == 1);
ApplyInterpolateMode(opts, mode);
opts.antialias(antialias);

if (size_len > 0) {
std::vector<int64_t> sizes;
Expand Down
2 changes: 1 addition & 1 deletion src/Native/LibTorchSharp/THSNN.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ EXPORT_API(Tensor) THSNN_pixel_unshuffle(const Tensor tensor, const int64_t do
// Vision -- Functions

EXPORT_API(Tensor) THSNN_pad(const Tensor input, const int64_t* pad, const int pad_length, const int8_t mode, const double value);
EXPORT_API(Tensor) THSNN_interpolate(const Tensor input, const int64_t* size, const int size_len, const double* scale_factor, const int scale_factor_len, const int8_t mode, const int8_t align_corners, const bool recompute_scale_factor, NNAnyModule* outAsAnyModule);
EXPORT_API(Tensor) THSNN_interpolate(const Tensor input, const int64_t* size, const int size_len, const double* scale_factor, const int scale_factor_len, const int8_t mode, const int8_t align_corners, const bool recompute_scale_factor, const bool antialias, NNAnyModule* outAsAnyModule);
EXPORT_API(Tensor) THSNN_grid_sample(const Tensor input, const Tensor grid, const int8_t mode, const int8_t padding_mode, const int8_t align_corners);
EXPORT_API(Tensor) THSNN_affine_grid(const Tensor theta, const int64_t* size, const int size_len, const bool align_corners);

Expand Down
14 changes: 10 additions & 4 deletions src/TorchSharp/NN/Vision.cs
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,8 @@ public enum InterpolationMode
Bilinear = 2,
Bicubic = 3,
Trilinear = 4,
Area = 5
Area = 5,
NearestExact = 6
}

public enum GridSampleMode
Expand Down Expand Up @@ -194,7 +195,7 @@ public static Tensor affine_grid(Tensor theta, long[]? size = null, bool align_c
/// <param name="x">The input tensor</param>
/// <param name="size">Output spatial size</param>
/// <param name="scale_factor">Multiplier for spatial size. Has to match input size if it is a tuple.</param>
/// <param name="mode">The algorithm used for upsampling: 'nearest' | 'linear' | 'bilinear' | 'bicubic' | 'trilinear' | 'area'</param>
/// <param name="mode">The algorithm used for upsampling: 'nearest' | 'linear' | 'bilinear' | 'bicubic' | 'trilinear' | 'area' | 'nearest-exact'</param>
/// <param name="align_corners">Geometrically, we consider the pixels of the input and output as squares rather than points.
/// If set to true, the input and output tensors are aligned by the center points of their corner pixels, preserving the values at the corner pixels.
/// If set to false, the input and output tensors are aligned by the corner points of their corner pixels, and the interpolation uses edge value padding for out-of-boundary values, making this operation independent of input size when scale_factor is kept the same.</param>
Expand All @@ -205,14 +206,19 @@ public static Tensor affine_grid(Tensor theta, long[]? size = null, bool align_c
/// Otherwise, a new scale_factor will be computed based on the output and input sizes for use in the interpolation computation
/// (i.e. the computation will be identical to if the computed output_size were passed-in explicitly).
/// </param>
/// <param name="antialias">
/// Flag to apply anti-aliasing. Using anti-alias
/// option together with align_corners = false, interpolation result would match Pillow
/// result for downsampling operation. Supported modes: 'bilinear', 'bicubic'.
/// </param>
/// <returns></returns>
public static Tensor interpolate(Tensor x, long[]? size = null, double[]? scale_factor = null, InterpolationMode mode = InterpolationMode.Nearest, bool? align_corners = null, bool recompute_scale_factor = false)
public static Tensor interpolate(Tensor x, long[]? size = null, double[]? scale_factor = null, InterpolationMode mode = InterpolationMode.Nearest, bool? align_corners = null, bool recompute_scale_factor = false, bool antialias = false)
{
unsafe {
fixed (long* psize = size) {
fixed (double* pSF = scale_factor) {
byte ac = (byte)((align_corners.HasValue) ? (align_corners.Value ? 1 : 2) : 0);
var res = THSNN_interpolate(x.Handle, (IntPtr)psize, size is null ? 0 : size.Length, (IntPtr)pSF, scale_factor is null ? 0 : scale_factor.Length, (byte)mode, ac, recompute_scale_factor);
var res = THSNN_interpolate(x.Handle, (IntPtr)psize, size is null ? 0 : size.Length, (IntPtr)pSF, scale_factor is null ? 0 : scale_factor.Length, (byte)mode, ac, recompute_scale_factor, antialias);
if (res == IntPtr.Zero) { torch.CheckForErrors(); }
return new Tensor(res);
}
Expand Down
2 changes: 1 addition & 1 deletion src/TorchSharp/PInvoke/LibTorchSharp.THSNN.cs
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ internal static extern IntPtr THSNN_custom_module(

[DllImport("LibTorchSharp")]
// align_corners -- 0=None, 1=true, 2=false
internal static extern IntPtr THSNN_interpolate(IntPtr input, IntPtr size, int size_len, IntPtr scale_factor, int scale_factor_len, byte mode, byte align_corners, [MarshalAs(UnmanagedType.U1)] bool recompute_scale_factor);
internal static extern IntPtr THSNN_interpolate(IntPtr input, IntPtr size, int size_len, IntPtr scale_factor, int scale_factor_len, byte mode, byte align_corners, [MarshalAs(UnmanagedType.U1)] bool recompute_scale_factor, [MarshalAs(UnmanagedType.U1)] bool antialias);

[DllImport("LibTorchSharp")]
// align_corners -- 0=None, 1=true, 2=false
Expand Down
53 changes: 48 additions & 5 deletions src/TorchVision/Functional.cs
Original file line number Diff line number Diff line change
Expand Up @@ -694,13 +694,23 @@ public static Tensor posterize(Tensor input, int bits)
/// <param name="input">An image tensor.</param>
/// <param name="height">The height of the resized image. Must be > 0.</param>
/// <param name="width">The width of the resized image. Must be > 0.</param>
/// <param name="interpolation">
/// Desired interpolation enum defined by TorchSharp.torch.InterpolationMode.
/// Default is InterpolationMode.Nearest; not InterpolationMode.Bilinear (incompatible to Python's torchvision v0.17 or later for historical reasons).
/// Only InterpolationMode.Nearest, InterpolationMode.NearestExact, InterpolationMode.Bilinear and InterpolationMode.Bicubic are supported.
/// </param>
/// <param name="maxSize">The maximum allowed for the longer edge of the resized image.</param>
/// <param name="antialias">
/// Whether to apply antialiasing.
/// It only affects bilinear or bicubic modes and it is ignored otherwise.
/// Possible values are:
/// * true: will apply antialiasing for bilinear or bicubic modes. Other mode aren't affected. This is probably what you want to use.
/// * false (default, incompatible to Python's torchvision v0.17 or later for historical reasons): will not apply antialiasing on any mode.
/// antialias value will be automatically set to false silently in case interpolation is not InterpolationMode.Bilinear or InterpolationMode.Bicubic.
/// </param>
/// <returns></returns>
public static Tensor resize(Tensor input, int height, int width, int? maxSize = null)
public static Tensor resize(Tensor input, int height, int width, InterpolationMode interpolation = InterpolationMode.Nearest, int? maxSize = null, bool antialias = false)
{
// For now, we don't allow any other modes.
const InterpolationMode interpolation = InterpolationMode.Nearest;

var hoffset = input.Dimensions - 2;
var iHeight = input.shape[hoffset];
var iWidth = input.shape[hoffset + 1];
Expand All @@ -727,13 +737,46 @@ public static Tensor resize(Tensor input, int height, int width, int? maxSize =
}
}

// See https://github.com/pytorch/vision/blob/v0.21.0/torchvision/transforms/_functional_tensor.py#L455
// "We manually set it to False to avoid an error downstream in interpolate()
// This behaviour is documented: the parameter is irrelevant for modes
// that are not bilinear or bicubic. We used to raise an error here, but
// now we don't ..."
if (antialias && interpolation != InterpolationMode.Bilinear && interpolation != InterpolationMode.Bicubic)
antialias = false;

using var img0 = SqueezeIn(input, new ScalarType[] { ScalarType.Float32, ScalarType.Float64 }, out var needCast, out var needSqueeze, out var dtype);

using var img1 = torch.nn.functional.interpolate(img0, new long[] { h, w }, mode: interpolation, align_corners: null);
using var img1 = torch.nn.functional.interpolate(img0, new long[] { h, w }, mode: interpolation, align_corners: null, antialias: antialias);

return SqueezeOut(img1, needCast, needSqueeze, dtype);
}

/// <summary>
/// Resize the input image to the given size.
/// </summary>
/// <param name="input">An image tensor.</param>
/// <param name="height">The height of the resized image. Must be > 0.</param>
/// <param name="width">The width of the resized image. Must be > 0.</param>
/// <param name="maxSize">The maximum allowed for the longer edge of the resized image.</param>
/// <returns></returns>
public static Tensor resize(Tensor input, int height, int width, int? maxSize = null)
{
return resize(input, height, width, InterpolationMode.Nearest, maxSize, false);
}

/// <summary>
/// Resize the input image to the given size.
/// </summary>
/// <param name="input">An image tensor.</param>
/// <param name="height">The height of the resized image. Must be > 0.</param>
/// <param name="width">The width of the resized image. Must be > 0.</param>
/// <returns></returns>
public static Tensor resize(Tensor input, int height, int width)
{
return resize(input, height, width, InterpolationMode.Nearest, null, false);
}

/// <summary>
/// Crop the given image and resize it to desired size.
/// </summary>
Expand Down
81 changes: 76 additions & 5 deletions src/TorchVision/Resize.cs
Original file line number Diff line number Diff line change
Expand Up @@ -8,24 +8,86 @@ public static partial class torchvision
{
internal class Resize : ITransform
{
internal Resize(int height, int width, int? maxSize)
internal Resize(int height, int width, InterpolationMode interpolation, int? maxSize, bool antialias)
{
this.height = height;
this.width = width;
this.interpolation = interpolation;
this.maxSize = maxSize;
this.antialias = antialias;
}

public Tensor call(Tensor input)
{
return transforms.functional.resize(input, height, width, maxSize);
return transforms.functional.resize(input, height, width, interpolation, maxSize, antialias);
}

private int height, width;
private InterpolationMode interpolation;
private int? maxSize;
private bool antialias;
}

public static partial class transforms
{
/// <summary>
/// Resize the input image to the given size.
/// </summary>
/// <param name="height">Desired output height</param>
/// <param name="width">Desired output width</param>
/// <param name="interpolation">
/// Desired interpolation enum defined by TorchSharp.torch.InterpolationMode.
/// Default is InterpolationMode.Nearest; not InterpolationMode.Bilinear (incompatible to Python's torchvision v0.17 or later for historical reasons).
/// Only InterpolationMode.Nearest, InterpolationMode.NearestExact, InterpolationMode.Bilinear and InterpolationMode.Bicubic are supported.
/// </param>
/// <param name="maxSize">The maximum allowed for the longer edge of the resized image.</param>
/// <param name="antialias">
/// Whether to apply antialiasing.
/// It only affects bilinear or bicubic modes and it is ignored otherwise.
/// Possible values are:
/// * true: will apply antialiasing for bilinear or bicubic modes. Other mode aren't affected. This is probably what you want to use.
/// * false (default, incompatible to Python's torchvision v0.17 or later for historical reasons): will not apply antialiasing on any mode.
/// </param>
/// <returns></returns>
static public ITransform Resize(int height, int width, InterpolationMode interpolation = InterpolationMode.Nearest, int? maxSize = null, bool antialias = false)
{
return new Resize(height, width, interpolation, maxSize, antialias);
}

/// <summary>
/// Resize the input image to the given size.
/// </summary>
/// <param name="size">Desired output size</param>
/// <param name="interpolation">
/// Desired interpolation enum defined by TorchSharp.torch.InterpolationMode.
/// Default is InterpolationMode.Nearest; not InterpolationMode.Bilinear (incompatible to Python's torchvision v0.17 or later for historical reasons).
/// Only InterpolationMode.Nearest, InterpolationMode.NearestExact, InterpolationMode.Bilinear and InterpolationMode.Bicubic are supported.
/// </param>
/// <param name="maxSize">The maximum allowed for the longer edge of the resized image.</param>
/// <param name="antialias">
/// Whether to apply antialiasing.
/// It only affects bilinear or bicubic modes and it is ignored otherwise.
/// Possible values are:
/// * true: will apply antialiasing for bilinear or bicubic modes. Other mode aren't affected. This is probably what you want to use.
/// * false (default, incompatible to Python's torchvision v0.17 or later for historical reasons): will not apply antialiasing on any mode.
/// </param>
static public ITransform Resize(int size, InterpolationMode interpolation = InterpolationMode.Nearest, int? maxSize = null, bool antialias = false)
{
return new Resize(size, -1, interpolation, maxSize, antialias);
}

/// <summary>
/// Resize the input image to the given size.
/// </summary>
/// <param name="height">Desired output height</param>
/// <param name="width">Desired output width</param>
/// <param name="maxSize">The maximum allowed for the longer edge of the resized image.</param>
/// <returns></returns>
static public ITransform Resize(int height, int width, int? maxSize = null)
{
return new Resize(height, width, InterpolationMode.Nearest, maxSize, false);
}

/// <summary>
/// Resize the input image to the given size.
/// </summary>
Expand All @@ -34,17 +96,26 @@ public static partial class transforms
/// <returns></returns>
static public ITransform Resize(int height, int width)
{
return new Resize(height, width, null);
return new Resize(height, width, InterpolationMode.Nearest, null, false);
}

/// <summary>
/// Resize the input image to the given size.
/// </summary>
/// <param name="size">Desired output size</param>
/// <param name="maxSize">Max size</param>
/// <param name="maxSize">The maximum allowed for the longer edge of the resized image.</param>
static public ITransform Resize(int size, int? maxSize = null)
{
return new Resize(size, -1, maxSize);
return new Resize(size, -1, InterpolationMode.Nearest, maxSize, false);
}

/// <summary>
/// Resize the input image to the given size.
/// </summary>
/// <param name="size">Desired output size</param>
static public ITransform Resize(int size)
{
return new Resize(size, -1, InterpolationMode.Nearest, null, false);
}
}
}
Expand Down
Loading