99#include " axom/core/numerics/Matrix.hpp"
1010#include " axom/core/numerics/Determinants.hpp"
1111
12+ #include " axom/fmt.hpp"
13+
14+ #include < cmath>
15+ #include < cstdint>
16+ #include < cstring>
17+ #include < iostream>
18+ #include < limits>
19+ #include < random>
20+
21+ namespace
22+ {
23+
24+ /* *
25+ * \brief Returns the number of IEEE-754 double-precision representable values
26+ * (ULPs) separating \p a and \p b.
27+ *
28+ * Computes the absolute difference in their ordered bit representations.
29+ * Assumes IEEE-754 64-bit floats
30+ * Returns max uint64_t if either argument is NaN; and 0 for equal params (e.g. +0 and -0).
31+ */
32+ std::uint64_t ulps_apart (double a, double b)
33+ {
34+ static_assert (sizeof (double ) == sizeof (std::uint64_t ), " unexpected double size" );
35+ static_assert (std::numeric_limits<double >::is_iec559, " requires IEEE-754" );
36+
37+ if (std::isnan (a) || std::isnan (b))
38+ {
39+ return std::numeric_limits<std::uint64_t >::max ();
40+ }
41+
42+ // Treat +0 and -0 (and any equal values) as distance 0
43+ if (a == b)
44+ {
45+ return 0 ;
46+ }
47+
48+ // Order-preserving bit transform for ULP comparison.
49+ auto to_ordered = [](double x) -> std::uint64_t {
50+ std::uint64_t bits = 0 ;
51+ std::memcpy (&bits, &x, sizeof (bits));
52+ constexpr std::uint64_t sign = std::uint64_t {1 } << 63 ;
53+ return (bits & sign) ? ~bits : (bits + sign);
54+ };
55+
56+ const std::uint64_t oa = to_ordered (a);
57+ const std::uint64_t ob = to_ordered (b);
58+ return (oa > ob) ? (oa - ob) : (ob - oa);
59+ }
60+
61+ // "raw" implementation of 2x2 determinant for comparisons to axom::utilities::determinant
62+ // uses `volatile` in an attempt to allide compiler optimization that internally use fma calculations
63+ double det2_raw (double a00, double a01, double a10, double a11)
64+ {
65+ volatile double p0 = a00 * a11;
66+ volatile double p1 = a01 * a10;
67+ return p0 - p1;
68+ }
69+
70+ // "raw" implementation of 3x3 determinant for comparisons to axom::utilities::determinant
71+ // uses `volatile` in an attempt to allide compiler optimization that internally use fma calculations
72+ double det3_raw (double a00,
73+ double a01,
74+ double a02,
75+ double a10,
76+ double a11,
77+ double a12,
78+ double a20,
79+ double a21,
80+ double a22)
81+ {
82+ volatile double m01 = a11 * a22 - a12 * a21;
83+ volatile double m02 = a10 * a22 - a12 * a20;
84+ volatile double m12 = a10 * a21 - a11 * a20;
85+
86+ volatile double t0 = a00 * m01;
87+ volatile double t1 = a01 * m02;
88+ volatile double t2 = a02 * m12;
89+ return (t0 - t1) + t2;
90+ }
91+ } // namespace
92+
1293TEST (numerics_determinants, determinant_of_In)
1394{
1495 const int N = 25 ;
@@ -27,34 +108,260 @@ TEST(numerics_determinants, determinant5x5)
27108 const int N = 5 ;
28109 const double EPS = 1e-11 ;
29110
111+ // clang-format off
30112 axom::numerics::Matrix<double > A (N, N);
31- A (0 , 0 ) = 1 ;
32- A (0 , 1 ) = 2 ;
33- A (0 , 2 ) = 4 ;
34- A (0 , 3 ) = 3 ;
35- A (0 , 4 ) = 0 ;
36- A (1 , 0 ) = 2 ;
37- A (1 , 1 ) = 1 ;
38- A (1 , 2 ) = -1 ;
39- A (1 , 3 ) = 1 ;
40- A (1 , 4 ) = 3 ;
41- A (2 , 0 ) = 4 ;
42- A (2 , 1 ) = -1 ;
43- A (2 , 2 ) = -2 ;
44- A (2 , 3 ) = 5 ;
45- A (2 , 4 ) = 1 ;
46- A (3 , 0 ) = 7 ;
47- A (3 , 1 ) = 3 ;
48- A (3 , 2 ) = 6 ;
49- A (3 , 3 ) = 2 ;
50- A (3 , 4 ) = 1 ;
51- A (4 , 0 ) = 1 ;
52- A (4 , 1 ) = 0 ;
53- A (4 , 2 ) = -1 ;
54- A (4 , 3 ) = 1 ;
55- A (4 , 4 ) = 1 ;
113+ A (0 , 0 ) = 1 ; A (0 , 1 ) = 2 ; A (0 , 2 ) = 4 ; A (0 , 3 ) = 3 ; A (0 , 4 ) = 0 ;
114+ A (1 , 0 ) = 2 ; A (1 , 1 ) = 1 ; A (1 , 2 ) = -1 ; A (1 , 3 ) = 1 ; A (1 , 4 ) = 3 ;
115+ A (2 , 0 ) = 4 ; A (2 , 1 ) = -1 ; A (2 , 2 ) = -2 ; A (2 , 3 ) = 5 ; A (2 , 4 ) = 1 ;
116+ A (3 , 0 ) = 7 ; A (3 , 1 ) = 3 ; A (3 , 2 ) = 6 ; A (3 , 3 ) = 2 ; A (3 , 4 ) = 1 ;
117+ A (4 , 0 ) = 1 ; A (4 , 1 ) = 0 ; A (4 , 2 ) = -1 ; A (4 , 3 ) = 1 ; A (4 , 4 ) = 1 ;
118+ // clang-format on
56119
57120 double computed_det = axom::numerics::determinant (A);
58121 double expected_det = -34.0 ;
59122 EXPECT_NEAR (expected_det, computed_det, EPS);
60123}
124+
125+ // ------------------------------------------------------------------------------
126+ TEST (numerics_determinants, check_ulps_apart_function)
127+ {
128+ constexpr auto zero = std::uint64_t {0 };
129+ constexpr auto one = std::uint64_t {1 };
130+ constexpr auto two = std::uint64_t {2 };
131+ constexpr auto three = std::uint64_t {3 };
132+
133+ // test identity/equality
134+ {
135+ EXPECT_EQ (ulps_apart (1 ., 1 .), zero);
136+ EXPECT_EQ (ulps_apart (-0 ., +0 .), zero);
137+ }
138+
139+ // test values around 1
140+ {
141+ const double x = 1 .;
142+ const double up = std::nextafter (x, x + 1 .); // next ulp in positive direction
143+ const double down = std::nextafter (x, x - 1 .); // nex ulp in negative direction
144+ EXPECT_EQ (ulps_apart (x, up), one);
145+ EXPECT_EQ (ulps_apart (x, down), one);
146+ EXPECT_EQ (ulps_apart (down, up), two);
147+
148+ // check symmetric
149+ EXPECT_EQ (ulps_apart (x, up), ulps_apart (up, x));
150+
151+ // check a few increasing values
152+ double y = x;
153+ for (int k = 1 ; k <= 10 ; ++k)
154+ {
155+ y = nextafter (y, y + 1 .);
156+ EXPECT_EQ (ulps_apart (x, y), static_cast <uint64_t >(k));
157+ }
158+ }
159+
160+ // test values around zero
161+ {
162+ const double ms = std::numeric_limits<double >::denorm_min ();
163+ const double nms = -ms;
164+ EXPECT_EQ (ulps_apart (nms, -0.0 ), one);
165+ EXPECT_EQ (ulps_apart (+0.0 , ms), one);
166+ EXPECT_EQ (ulps_apart (nms, ms), three); // -0 to +0 still counts as one ulp
167+
168+ const double up = std::nextafter (ms, 1 .);
169+ EXPECT_EQ (ulps_apart (ms, up), one);
170+
171+ const double down = std::nextafter (nms, -1 .);
172+ EXPECT_EQ (ulps_apart (nms, down), one);
173+ }
174+
175+ // test inf
176+ {
177+ const double mx = std::numeric_limits<double >::max ();
178+ const double inf = std::numeric_limits<double >::infinity ();
179+ EXPECT_EQ (ulps_apart (mx, nextafter (mx, inf)), one);
180+ EXPECT_EQ (nextafter (mx, inf), inf);
181+ EXPECT_EQ (ulps_apart (mx, inf), one);
182+
183+ const double ninf = -inf;
184+ const double nmx = -mx;
185+ EXPECT_EQ (nextafter (nmx, ninf), ninf);
186+ EXPECT_EQ (ulps_apart (nmx, ninf), one);
187+ }
188+
189+ // test NaN
190+ {
191+ constexpr uint64_t UMAX = std::numeric_limits<std::uint64_t >::max ();
192+ const double qnan = std::numeric_limits<double >::quiet_NaN ();
193+ EXPECT_EQ (ulps_apart (qnan, 1.0 ), UMAX);
194+ EXPECT_EQ (ulps_apart (1.0 , qnan), UMAX);
195+ EXPECT_EQ (ulps_apart (qnan, qnan), UMAX);
196+ }
197+ }
198+
199+ // ------------------------------------------------------------------------------
200+ TEST (numerics_determinants, determinant_fma_reduces_ulp_for_cancellation)
201+ {
202+ // Choose values that are exactly representable in double, but whose products are not.
203+ // This forces rounding in intermediate products, and the determinant computation is sensitive to cancellation.
204+ //
205+ // Let b=c=2^27, a=2^27+1, d=2^27-1.
206+ // Exact det = a*d - b*c = (2^54 - 1) - 2^54 = -1.
207+ const double base = std::ldexp (1.0 , 27 );
208+ const double a = base + 1.0 ;
209+ const double b = base;
210+ const double c = base;
211+ const double d = base - 1.0 ;
212+ constexpr double expected = -1.0 ;
213+
214+ {
215+ const double det_fma = axom::numerics::determinant (a, b, c, d);
216+ const double det_raw = det2_raw (a, b, c, d);
217+
218+ const auto ulp_fma = ulps_apart (det_fma, expected);
219+ const auto ulp_raw = ulps_apart (det_raw, expected);
220+ axom::fmt::print (
221+ " 2x2 cancellation: expected={} det_fma={} det_raw={} (ulps from expected: fma={} raw={})\n " ,
222+ expected,
223+ det_fma,
224+ det_raw,
225+ ulp_fma,
226+ ulp_raw);
227+
228+ EXPECT_DOUBLE_EQ (expected, det_fma);
229+ EXPECT_LE (ulp_fma, ulp_raw);
230+ }
231+
232+ // Embed the same 2x2 cancellation into a 3x3 determinant.
233+ {
234+ const double det_fma = axom::numerics::determinant (a, b, 0.0 , c, d, 0.0 , 0.0 , 0.0 , 1.0 );
235+ const double det_raw = det3_raw (a, b, 0.0 , c, d, 0.0 , 0.0 , 0.0 , 1.0 );
236+
237+ const auto ulp_fma = ulps_apart (det_fma, expected);
238+ const auto ulp_raw = ulps_apart (det_raw, expected);
239+
240+ axom::fmt::print (
241+ " 3x3 cancellation: expected={} det_fma={} det_raw={} (ulps from expected: fma={} raw={})\n " ,
242+ expected,
243+ det_fma,
244+ det_raw,
245+ ulp_fma,
246+ ulp_raw);
247+
248+ EXPECT_DOUBLE_EQ (expected, det_fma);
249+ EXPECT_LE (ulp_fma, ulp_raw);
250+ }
251+ }
252+
253+ // ------------------------------------------------------------------------------
254+ TEST (numerics_determinants, determinant_fma_reduces_ulp_statistics_2x2)
255+ {
256+ #if !defined(__SIZEOF_INT128__)
257+ GTEST_SKIP () << " Test requires __int128 for an exact integer reference determinant." ;
258+ #else
259+ // Use many near-cancellation 2x2 determinants with integer inputs.
260+ // Compare the ULP error against the correctly-rounded double value of the exact integer determinant.
261+
262+ constexpr std::int64_t base = (1LL << 27 ); // exactly representable as double
263+ constexpr int SAMPLES = 5000 ;
264+
265+ std::mt19937_64 rng (42ULL );
266+ std::uniform_int_distribution<std::int64_t > delta_dist (-2048 , 2048 );
267+
268+ long double sum_raw_ulp = 0 .0L ;
269+ long double sum_fma_ulp = 0 .0L ;
270+ long double sum_delta = 0 .0L ;
271+
272+ std::uint64_t max_raw_ulp = 0 ;
273+ std::uint64_t max_fma_ulp = 0 ;
274+ int better = 0 ;
275+ int worse = 0 ;
276+ int tied = 0 ;
277+
278+ for (int i = 0 ; i < SAMPLES; ++i)
279+ {
280+ // use some explicit test cases, or random numbers otherwise
281+ std::int64_t da {}, dd {};
282+ switch (i)
283+ {
284+ case 0 :
285+ da = 1 .;
286+ dd = -1 .;
287+ break ;
288+ case 1 :
289+ da = 2048 ;
290+ dd = -2048 ;
291+ break ;
292+ case 2 :
293+ da = 2048 ;
294+ dd = 2028 ;
295+ break ;
296+ default :
297+ da = delta_dist (rng);
298+ dd = delta_dist (rng);
299+ break ;
300+ }
301+
302+ // actual determinant should be an integer: (a_i * d_i) - (b_i * c_i)
303+ const std::int64_t a_i = base + da;
304+ const std::int64_t b_i = base;
305+ const std::int64_t c_i = base;
306+ const std::int64_t d_i = base + dd;
307+
308+ const __int128 exact = //
309+ static_cast <__int128>(a_i) * static_cast <__int128>(d_i) -
310+ static_cast <__int128>(b_i) * static_cast <__int128>(c_i);
311+ const double ref = static_cast <double >(exact);
312+
313+ const double a = static_cast <double >(a_i);
314+ const double b = static_cast <double >(b_i);
315+ const double c = static_cast <double >(c_i);
316+ const double d = static_cast <double >(d_i);
317+
318+ const double det_fma = axom::numerics::determinant (a, b, c, d);
319+ const double det_raw = det2_raw (a, b, c, d);
320+
321+ const std::uint64_t ulp_fma = ulps_apart (det_fma, ref);
322+ const std::uint64_t ulp_raw = ulps_apart (det_raw, ref);
323+
324+ sum_fma_ulp += static_cast <long double >(ulp_fma);
325+ sum_raw_ulp += static_cast <long double >(ulp_raw);
326+ sum_delta += static_cast <long double >(ulp_raw) - static_cast <long double >(ulp_fma);
327+
328+ max_fma_ulp = std::max (max_fma_ulp, ulp_fma);
329+ max_raw_ulp = std::max (max_raw_ulp, ulp_raw);
330+
331+ if (ulp_fma < ulp_raw)
332+ {
333+ ++better;
334+ }
335+ else if (ulp_fma > ulp_raw)
336+ {
337+ ++worse;
338+ }
339+ else
340+ {
341+ ++tied;
342+ }
343+ }
344+
345+ const long double mean_fma = sum_fma_ulp / static_cast <long double >(SAMPLES);
346+ const long double mean_raw = sum_raw_ulp / static_cast <long double >(SAMPLES);
347+ const long double mean_delta = sum_delta / static_cast <long double >(SAMPLES);
348+
349+ axom::fmt::print (
350+ " 2x2 ULP stats for {} samples\n "
351+ " \t mean_fma={} mean_raw={} mean_delta(raw-fma)={}\n "
352+ " \t max_fma={} max_raw={}\n "
353+ " \t better={} worse={} tied={}\n " ,
354+ SAMPLES,
355+ static_cast <double >(mean_fma),
356+ static_cast <double >(mean_raw),
357+ static_cast <double >(mean_delta),
358+ max_fma_ulp,
359+ max_raw_ulp,
360+ better,
361+ worse,
362+ tied);
363+
364+ EXPECT_GE (mean_delta, 0 .0L );
365+ EXPECT_GE (better, worse);
366+ #endif
367+ }
0 commit comments