@@ -930,9 +930,9 @@ bvt bv_utilst::dadda_tree(const std::vector<bvt> &pps)
930
930
// while not regressing substantially in the matrix of different benchmarks and
931
931
// CaDiCaL and MiniSat2 as solvers.
932
932
// #define RADIX_MULTIPLIER 8
933
- // #define USE_KARATSUBA
933
+ #define USE_KARATSUBA
934
934
// #define USE_TOOM_COOK
935
- #define USE_SCHOENHAGE_STRASSEN
935
+ // #define USE_SCHOENHAGE_STRASSEN
936
936
#ifdef RADIX_MULTIPLIER
937
937
# define DADDA_TREE
938
938
#endif
@@ -1828,6 +1828,61 @@ bvt bv_utilst::unsigned_multiplier(const bvt &_op0, const bvt &_op1)
1828
1828
}
1829
1829
}
1830
1830
1831
+ bvt bv_utilst::unsigned_karatsuba_full_multiplier (const bvt &op0, const bvt &op1)
1832
+ {
1833
+ // We review symbolic encoding of multiplication in context of sw
1834
+ // verification, bit width is 2^n, distinguish truncating (x mod 2^2^n) from
1835
+ // double-output-width multiplication, truncating Karatsuba is 2 truncating
1836
+ // half-width multiplication plus one double-output-width of half width, for
1837
+ // double output width Karatsuba idea is challenge to avoid width extension,
1838
+ // check Wikipedia edit history
1839
+
1840
+ PRECONDITION (op0.size () == op1.size ());
1841
+ const std::size_t op_size = op0.size ();
1842
+ PRECONDITION (op_size > 0 );
1843
+ PRECONDITION ((op_size & (op_size - 1 )) != 0 );
1844
+
1845
+ if (op_size == 1 )
1846
+ return {prop.land (_op0[0 ], _op1[0 ]), const_literal (false )};
1847
+
1848
+ const std::size_t half_op_size = op_size >> 1 ;
1849
+
1850
+ bvt x0{op0.begin (), op0.begin () + half_op_size};
1851
+ bvt x1{op0.begin () + half_op_size, op0.end ()};
1852
+
1853
+ bvt y0{op1.begin (), op1.begin () + half_op_size};
1854
+ bvt y1{op1.begin () + half_op_size, op1.end ()};
1855
+
1856
+ bvt z0 = unsigned_karatsuba_full_multiplier (x0, y0);
1857
+ bvt z2 = unsigned_karatsuba_full_multiplier (x1, y1);
1858
+
1859
+ bvt x0_sub = zero_extension (x0, half_op_size + 1 );
1860
+ bvt x1_sub = zero_extension (x1, half_op_size + 1 );
1861
+
1862
+ bvt y0_sub = zero_extension (y0, half_op_size + 1 );
1863
+ bvt y1_sub = zero_extension (y1, half_op_size + 1 );
1864
+
1865
+ bvt x1_minus_x0_ext = sub (x1_sub, x0_sub);
1866
+ literalt x1_minus_x0_sign = sign_bit (x1_minus_x0_ext);
1867
+ bvt x1_minus_x0_abs = absolute_value (x1_minus_x0_ext);
1868
+ x1_minus_x0_abs.pop_back ();
1869
+ bvt y0_minus_y1_ext = sub (y0_sub, y1_sub);
1870
+ literalt y0_minus_y1_sign = sign_bit (y0_minus_y1_ext);
1871
+ bvt y0_minus_y1_abs = absolute_value (y0_minus_y1_ext);
1872
+ y0_minus_y1_ext.pop_back ();
1873
+ bvt sub_mult =
1874
+ unsigned_karatsuba_full_multiplier (x1_minus_x0_abs, y0_minus_y1_abs);
1875
+ bvt sub_mult_ext = zero_extension (sub_mult, op_size + 1 );
1876
+ bvt z1_ext = add_sub (zero_extension (add (z0, z2), op_size + 1 ), sub_mult_ext,
1877
+ prop.lxor (x1_minus_x0_sign, y0_minus_y1_sign));
1878
+
1879
+ bvt z0_full = zero_extension (z0, op_size << 1 );
1880
+ bvt z1_full = zero_extension (concatenate (zeros (half_op_size), z1_ext), op_size << 1 );
1881
+ bvt z2_full = concatenate (zeros (op_size), z2);
1882
+
1883
+ return add (add (z0_full, z1_full), z2_full);
1884
+ }
1885
+
1831
1886
bvt bv_utilst::unsigned_karatsuba_multiplier (const bvt &_op0, const bvt &_op1)
1832
1887
{
1833
1888
if (_op0.size () != _op1.size ())
@@ -1838,41 +1893,51 @@ bvt bv_utilst::unsigned_karatsuba_multiplier(const bvt &_op0, const bvt &_op1)
1838
1893
if (op_size == 0 || (op_size & (op_size - 1 )) != 0 )
1839
1894
return unsigned_multiplier (_op0, _op1);
1840
1895
1896
+ if (op_size == 1 )
1897
+ return {prop.land (_op0[0 ], _op1[0 ])};
1898
+
1841
1899
const std::size_t half_op_size = op_size >> 1 ;
1842
1900
1843
- // The need to use a full multiplier for z_0 means that we will not actually
1844
- // accomplish a reduction in bit width.
1901
+ // We split each of the operands in half and treat them as coefficients of a
1902
+ // polynomial a * 2^half_op_size + b. Straightforward polynomial
1903
+ // multiplication then yields
1904
+ // a0 * a1 * 2^op_size + (a0 * b1 + a1 * b0) * 2^half_op_size + b0 * b1
1905
+ // These would be four multiplications (the operands of which have half the
1906
+ // original bit width):
1907
+ // z0 = b0 * b1
1908
+ // z1 = a0 * b1 + a1 * b0
1909
+ // z2 = a0 * a1
1910
+ // Karatsuba's insight is that these four multiplications can be expressed
1911
+ // using just three multiplications:
1912
+ // z1 = (a0 - b0) * (b1 - a1) + z0 + z2
1913
+ //
1914
+ // Worked 4-bit example, 4-bit result:
1915
+ // abcd * efgh -> 4-bit result
1916
+ // cd * gh -> 4-bit result
1917
+ // cd * ef -> 2-bit result
1918
+ // ab * gh -> 2-bit result
1919
+ // d * h -> 2-bit result
1920
+ // c * g -> 2-bit result
1921
+ // (c - d) * (h - g) + dh + cg; use an extra sign bit for each of the
1922
+ // subtractions, and conditionally negate the product by xor-ing those sign
1923
+ // bits; dh + cg is a 2-bit addition (with possible results 0, 1, 2); the
1924
+ // product has possible values (-1, 0, 1); the final sum cannot evaluate to -1
1925
+ // as
1926
+ // * c=1, d=0, h=0, g=1 (1 * -1) implies cg=1
1927
+ // * c=0, d=1, h=1, g=0 (-1 * 1) implies dh=1
1928
+ // Therefore, after adding (dh + cg) the multiplication can safely be added
1929
+ // over just 2 bits.
1930
+
1845
1931
bvt x0{_op0.begin (), _op0.begin () + half_op_size};
1846
- x0.resize (op_size, const_literal (false ));
1847
1932
bvt x1{_op0.begin () + half_op_size, _op0.end ()};
1848
- // x1.resize(op_size, const_literal(false));
1849
1933
bvt y0{_op1.begin (), _op1.begin () + half_op_size};
1850
- y0.resize (op_size, const_literal (false ));
1851
1934
bvt y1{_op1.begin () + half_op_size, _op1.end ()};
1852
- // y1.resize(op_size, const_literal(false));
1853
-
1854
- bvt z0 = unsigned_multiplier (x0, y0);
1855
- bvt z2 = unsigned_karatsuba_multiplier (x1, y1);
1856
-
1857
- bvt z0_half{z0.begin (), z0.begin () + half_op_size};
1858
- bvt z2_plus_z0 = add (z2, z0_half);
1859
- z2_plus_z0.resize (half_op_size);
1860
-
1861
- bvt x0_half{x0.begin (), x0.begin () + half_op_size};
1862
- bvt xdiff = add (x0_half, x1);
1863
- // xdiff.resize(half_op_size);
1864
- bvt y0_half{y0.begin (), y0.begin () + half_op_size};
1865
- bvt ydiff = add (y1, y0_half);
1866
- // ydiff.resize(half_op_size);
1867
-
1868
- bvt z1 = sub (unsigned_karatsuba_multiplier (xdiff, ydiff), z2_plus_z0);
1869
- for (std::size_t i = 0 ; i < half_op_size; ++i)
1870
- z1.insert (z1.begin (), const_literal (false ));
1871
- // result.insert(result.end(), z1.begin(), z1.end());
1872
-
1873
- // z1.resize(op_size);
1874
- z0.resize (op_size);
1875
- return add (z0, z1);
1935
+
1936
+ bvt z0 = unsigned_karatsuba_full_multiplier (x0, y0);
1937
+ bvt z1 = add (unsigned_karatsuba_multiplier (x1, y0), unsigned_karatsuba_multiplier (x0, y1));
1938
+ bvt z1_full = concatenate (zeros (half_op_size, z1));
1939
+
1940
+ return add (z0, z1_full);
1876
1941
}
1877
1942
1878
1943
bvt bv_utilst::unsigned_toom_cook_multiplier (const bvt &_op0, const bvt &_op1)
0 commit comments