Skip to content

Commit 42dab80

Browse files
authored
[UnitTest] Add test for fmax reductions without fast-math. (#266)
Add unit tests for vectorizing FMax reductions without fast-math flags, covering a wide range of inputs, including various combinations of NaNs and signed-zeros. PR: #266
1 parent cad933d commit 42dab80

File tree

3 files changed

+296
-0
lines changed

3 files changed

+296
-0
lines changed

SingleSource/UnitTests/Vectorizer/common.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,14 @@
11
#include <memory>
22
#include <random>
33

4+
#define DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(Init, Loop, Type) \
5+
auto ScalarFn = [](auto *A, Type TC) -> Type { \
6+
Init _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
7+
}; \
8+
auto VectorFn = [](auto *A, Type TC) -> Type { \
9+
Init _Pragma("clang loop vectorize(enable)") Loop \
10+
};
11+
412
#define DEFINE_SCALAR_AND_VECTOR_FN2(Init, Loop) \
513
auto ScalarFn = [](auto *A, auto *B, unsigned TC) { \
614
Init _Pragma("clang loop vectorize(disable) interleave_count(1)") Loop \
Lines changed: 274 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,274 @@
1+
#include <algorithm>
2+
#include <functional>
3+
#include <iostream>
4+
#include <limits>
5+
#include <memory>
6+
#include <stdint.h>
7+
8+
#include "common.h"
9+
10+
static bool isEqual(float A, float B) {
11+
if (std::isnan(A) || std::isnan(B))
12+
return std::isnan(A) && std::isnan(B);
13+
14+
if (A == 0.0f)
15+
return B == 0.0f && std::signbit(A) == std::signbit(B);
16+
17+
return A == B;
18+
}
19+
20+
template <typename Ty> using Fn1Ty = std::function<Ty(Ty *, unsigned)>;
21+
22+
template <typename Ty>
23+
static void check(Fn1Ty<Ty> ScalarFn, Fn1Ty<Ty> VectorFn, float *Src,
24+
unsigned N, const char *Type) {
25+
auto Reference = ScalarFn(Src, N);
26+
auto ToCheck = VectorFn(Src, N);
27+
if (!isEqual(Reference, ToCheck)) {
28+
std::cerr << "Miscompare " << Type << ": " << Reference << " != " << ToCheck
29+
<< "\n";
30+
exit(1);
31+
}
32+
}
33+
34+
template <typename Ty>
35+
static void checkVectorFunction(Fn1Ty<Ty> ScalarFn, Fn1Ty<Ty> VectorFn,
36+
const char *Name) {
37+
std::cout << "Checking " << Name << "\n";
38+
39+
unsigned N = 1024;
40+
std::unique_ptr<Ty[]> Src1(new Ty[N]);
41+
init_data(Src1, N);
42+
43+
// Check with random inputs.
44+
45+
// Check with sorted inputs.
46+
std::sort(&Src1[0], &Src1[N]);
47+
check(ScalarFn, VectorFn, &Src1[0], N, "sorted");
48+
49+
// Check with reverse sorted inputs.
50+
std::reverse(&Src1[0], &Src1[N]);
51+
check(ScalarFn, VectorFn, &Src1[0], N, "reverse-sorted");
52+
53+
// Check with all max values.
54+
for (unsigned I = 0; I != N; ++I)
55+
Src1[I] = std::numeric_limits<Ty>::max();
56+
check(ScalarFn, VectorFn, &Src1[0], N, "all-max");
57+
58+
// Check with all min values.
59+
for (unsigned I = 0; I != N; ++I)
60+
Src1[I] = std::numeric_limits<Ty>::min();
61+
check(ScalarFn, VectorFn, &Src1[0], N, "all-min");
62+
63+
// Check with various denormals.
64+
Src1[0] = std::numeric_limits<Ty>::denorm_min();
65+
for (unsigned I = 1; I != N; ++I)
66+
Src1[I] = std::numeric_limits<Ty>::denorm_min() / I;
67+
check(ScalarFn, VectorFn, &Src1[0], N, "denormals");
68+
69+
// Check with inputs all zero.
70+
for (unsigned I = 0; I != N; ++I)
71+
Src1[I] = 0.0;
72+
check(ScalarFn, VectorFn, &Src1[0], N, "all-zeros");
73+
74+
// Check with NaN at different indices.
75+
for (unsigned NaNIdx = 3; NaNIdx != 32; NaNIdx++) {
76+
for (unsigned I = 0; I != N; ++I)
77+
Src1[I] = 100;
78+
Src1[NaNIdx] = std::numeric_limits<Ty>::quiet_NaN();
79+
80+
check(ScalarFn, VectorFn, &Src1[0], N, "NaN");
81+
}
82+
83+
// Check with multiple signed-zeros at different positions.
84+
for (unsigned Idx = 0; Idx != 64; ++Idx) {
85+
for (unsigned I = 0; I != N; ++I)
86+
Src1[I] = -1.0;
87+
88+
for (unsigned Offset = 1; Offset != 32; ++Offset) {
89+
Src1[Idx] = -0.0;
90+
Src1[Idx + Offset] = +0.0;
91+
92+
check(ScalarFn, VectorFn, &Src1[0], N, "signed-zeros");
93+
}
94+
}
95+
96+
for (unsigned Idx = 0; Idx != 64; ++Idx) {
97+
for (unsigned I = 0; I != N; ++I)
98+
Src1[I] = -1.0;
99+
100+
for (unsigned Offset = 1; Offset != 32; ++Offset) {
101+
Src1[Idx] = +0.0;
102+
Src1[Idx + Offset] = -0.0;
103+
104+
check(ScalarFn, VectorFn, &Src1[0], N, "signed-zeros");
105+
}
106+
}
107+
108+
// Check with max value at all possible indices.
109+
for (unsigned Idx = 0; Idx != N; ++Idx) {
110+
for (unsigned I = 0; I != N; ++I)
111+
Src1[I] = I;
112+
113+
Src1[Idx] = N + 1;
114+
115+
check(ScalarFn, VectorFn, &Src1[0], N, "full");
116+
117+
for (unsigned Offset = 1; Offset != 16; ++Offset) {
118+
if (Idx + Offset < N)
119+
Src1[Idx + Offset] = N + 1;
120+
121+
check(ScalarFn, VectorFn, &Src1[0], N, "full");
122+
}
123+
}
124+
125+
// Check with NaN value at all possible indices.
126+
for (unsigned Idx = 0; Idx != N; ++Idx) {
127+
for (unsigned I = 0; I != N; ++I)
128+
Src1[I] = I;
129+
130+
Src1[Idx] = std::numeric_limits<float>::quiet_NaN();
131+
check(ScalarFn, VectorFn, &Src1[0], N, "full-with-nan");
132+
133+
// Check with multiple NaNs at different offsets.
134+
for (unsigned Offset = 1; Offset != 16; ++Offset) {
135+
if (Idx + Offset < N)
136+
Src1[Idx + Offset] = std::numeric_limits<float>::quiet_NaN();
137+
138+
check(ScalarFn, VectorFn, &Src1[0], N, "full-with-multiple-nan");
139+
}
140+
}
141+
142+
// Check with multiple infinity values at different positions.
143+
for (unsigned Idx = 0; Idx != 64; ++Idx) {
144+
for (unsigned I = 0; I != N; ++I)
145+
Src1[I] = -1.0;
146+
147+
for (unsigned Offset = 1; Offset != 16; ++Offset) {
148+
Src1[Idx] = -std::numeric_limits<float>::infinity();
149+
Src1[Idx + Offset] = std::numeric_limits<float>::infinity();
150+
151+
check(ScalarFn, VectorFn, &Src1[0], N, "infinity");
152+
}
153+
}
154+
}
155+
156+
int main(void) {
157+
rng = std::mt19937(15);
158+
159+
{
160+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
161+
float Max = -2.0;, for (unsigned I = 0; I < 1024;
162+
I++) { Max = std::fmax(Max, A[I]); } return Max;
163+
, float);
164+
checkVectorFunction<float>(ScalarFn, VectorFn, "fmaxnum_start_neg_2");
165+
}
166+
{
167+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
168+
float Max = std::numeric_limits<float>::min();
169+
, for (unsigned I = 0; I < 1024;
170+
I++) { Max = std::fmax(Max, A[I]); } return Max;
171+
, float);
172+
checkVectorFunction<float>(ScalarFn, VectorFn, "fmaxnum_start_min");
173+
}
174+
{
175+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
176+
float Max = std::numeric_limits<float>::denorm_min();
177+
, for (unsigned I = 0; I < 1024;
178+
I++) { Max = std::fmax(Max, A[I]); } return Max;
179+
, float);
180+
checkVectorFunction<float>(ScalarFn, VectorFn, "fmaxnum_start_denorm_min");
181+
}
182+
{
183+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
184+
float Max = std::numeric_limits<float>::quiet_NaN();
185+
, for (unsigned I = 0; I < 1024;
186+
I++) { Max = std::fmax(Max, A[I]); } return Max;
187+
, float);
188+
checkVectorFunction<float>(ScalarFn, VectorFn, "fmaxnum_start_is_nan");
189+
}
190+
191+
{
192+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
193+
float Max = -2.0;
194+
, for (unsigned I = 0; I < 1024;
195+
I++) { Max = A[I] > Max ? A[I] : Max; } return Max;
196+
, float);
197+
checkVectorFunction<float>(ScalarFn, VectorFn, "fmax_strict_start_neg_2");
198+
}
199+
{
200+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
201+
float Max = std::numeric_limits<float>::min();
202+
, for (unsigned I = 0; I < 1024;
203+
I++) { Max = A[I] > Max ? A[I] : Max; } return Max;
204+
, float);
205+
checkVectorFunction<float>(ScalarFn, VectorFn, "fmax_strict_start_min");
206+
}
207+
{
208+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
209+
float Max = std::numeric_limits<float>::denorm_min();
210+
, for (unsigned I = 0; I < 1025;
211+
I++) { Max = A[I] > Max ? A[I] : Max; } return Max;
212+
, float);
213+
checkVectorFunction<float>(ScalarFn, VectorFn,
214+
"fmax_strict_start_denorm_min");
215+
}
216+
{
217+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
218+
float Max = std::numeric_limits<float>::quiet_NaN();
219+
, for (unsigned I = 0; I < 1025;
220+
I++) { Max = A[I] > Max ? A[I] : Max; } return Max;
221+
, float);
222+
checkVectorFunction<float>(ScalarFn, VectorFn, "fmax_strict_start_nan");
223+
}
224+
225+
{
226+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
227+
float Max = -2.0;
228+
, for (unsigned I = 0; I < 1024;
229+
I++) { Max = Max >= A[I] ? Max : A[I]; } return Max;
230+
, float);
231+
checkVectorFunction<float>(ScalarFn, VectorFn,
232+
"fmax_non_strict_start_neg_2");
233+
}
234+
235+
{
236+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
237+
float Max = -2.0;
238+
, for (unsigned I = 0; I < 1024;
239+
I++) { Max = Max > A[I] ? Max : A[I]; } return Max;
240+
, float);
241+
checkVectorFunction<float>(ScalarFn, VectorFn,
242+
"fmax_cmp_max_gt_start_neg_2");
243+
}
244+
245+
{
246+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
247+
float Max = -2.0;
248+
, for (unsigned I = 0; I < 1024;
249+
I++) { Max = Max < A[I] ? A[I] : Max; } return Max;
250+
, float);
251+
checkVectorFunction<float>(ScalarFn, VectorFn,
252+
"fmax_cmp_max_lt_start_neg_2");
253+
}
254+
{
255+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
256+
float Max = std::numeric_limits<float>::denorm_min();
257+
, for (unsigned I = 0; I < 1024;
258+
I++) { Max = Max < A[I] ? A[I] : Max; } return Max;
259+
, float);
260+
checkVectorFunction<float>(ScalarFn, VectorFn,
261+
"fmax_cmp_max_lt_start_denorm_min");
262+
}
263+
{
264+
DEFINE_SCALAR_AND_VECTOR_FN1_TYPE(
265+
float Max = std::numeric_limits<float>::quiet_NaN();
266+
, for (unsigned I = 0; I < 1024;
267+
I++) { Max = Max < A[I] ? A[I] : Max; } return Max;
268+
, float);
269+
checkVectorFunction<float>(ScalarFn, VectorFn,
270+
"fmax_cmp_max_lt_start_neg_nan");
271+
}
272+
273+
return 0;
274+
}
Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,14 @@
1+
Checking fmaxnum_start_neg_2
2+
Checking fmaxnum_start_min
3+
Checking fmaxnum_start_denorm_min
4+
Checking fmaxnum_start_is_nan
5+
Checking fmax_strict_start_neg_2
6+
Checking fmax_strict_start_min
7+
Checking fmax_strict_start_denorm_min
8+
Checking fmax_strict_start_nan
9+
Checking fmax_non_strict_start_neg_2
10+
Checking fmax_cmp_max_gt_start_neg_2
11+
Checking fmax_cmp_max_lt_start_neg_2
12+
Checking fmax_cmp_max_lt_start_denorm_min
13+
Checking fmax_cmp_max_lt_start_neg_nan
14+
exit 0

0 commit comments

Comments
 (0)