|
8 | 8 |
|
9 | 9 | #include "bv_utils.h"
|
10 | 10 |
|
| 11 | +#include <util/arith_tools.h> |
| 12 | + |
11 | 13 | #include <list>
|
12 | 14 | #include <utility>
|
13 | 15 |
|
@@ -783,7 +785,7 @@ bvt bv_utilst::dadda_tree(const std::vector<bvt> &pps)
|
783 | 785 | // trees (and also the default addition scheme), but isn't consistently more
|
784 | 786 | // performant with simple partial-product generation. Only when using
|
785 | 787 | // higher-radix multipliers the combination appears to perform better.
|
786 |
| -#define DADDA_TREE |
| 788 | +// #define DADDA_TREE |
787 | 789 |
|
788 | 790 | // The following examples demonstrate the performance differences (with a
|
789 | 791 | // time-out of 7200 seconds):
|
@@ -926,7 +928,8 @@ bvt bv_utilst::dadda_tree(const std::vector<bvt> &pps)
|
926 | 928 | // while not regressing substantially in the matrix of different benchmarks and
|
927 | 929 | // CaDiCaL and MiniSat2 as solvers.
|
928 | 930 | // #define RADIX_MULTIPLIER 8
|
929 |
| -#define USE_KARATSUBA |
| 931 | +// #define USE_KARATSUBA |
| 932 | +#define USE_TOOM_COOK |
930 | 933 | #ifdef RADIX_MULTIPLIER
|
931 | 934 | # define DADDA_TREE
|
932 | 935 | #endif
|
@@ -1948,6 +1951,155 @@ bvt bv_utilst::unsigned_karatsuba_multiplier(const bvt &_op0, const bvt &_op1)
|
1948 | 1951 | return add(z0, z1_full);
|
1949 | 1952 | }
|
1950 | 1953 |
|
| 1954 | +bvt bv_utilst::unsigned_toom_cook_multiplier(const bvt &_op0, const bvt &_op1) |
| 1955 | +{ |
| 1956 | + PRECONDITION(!_op0.empty()); |
| 1957 | + PRECONDITION(!_op1.empty()); |
| 1958 | + |
| 1959 | + if(_op1.size() == 1) |
| 1960 | + return unsigned_multiplier(_op0, _op1); |
| 1961 | + |
| 1962 | + // break up _op0, _op1 in groups of at most GROUP_SIZE bits |
| 1963 | + PRECONDITION(_op0.size() == _op1.size()); |
| 1964 | +#define GROUP_SIZE 8 |
| 1965 | + const std::size_t d_bits = |
| 1966 | + 2 * GROUP_SIZE + |
| 1967 | + 2 * address_bits((_op0.size() + GROUP_SIZE - 1) / GROUP_SIZE); |
| 1968 | + std::vector<bvt> a, b, c_ops, d; |
| 1969 | + for(std::size_t i = 0; i < _op0.size(); i += GROUP_SIZE) |
| 1970 | + { |
| 1971 | + std::size_t u = std::min(i + GROUP_SIZE, _op0.size()); |
| 1972 | + a.emplace_back(_op0.begin() + i, _op0.begin() + u); |
| 1973 | + b.emplace_back(_op1.begin() + i, _op1.begin() + u); |
| 1974 | + |
| 1975 | + c_ops.push_back(zeros(i)); |
| 1976 | + d.push_back(prop.new_variables(d_bits)); |
| 1977 | + c_ops.back().insert(c_ops.back().end(), d.back().begin(), d.back().end()); |
| 1978 | + c_ops.back() = zero_extension(c_ops.back(), _op0.size()); |
| 1979 | + } |
| 1980 | + for(std::size_t i = a.size(); i < 2 * a.size() - 1; ++i) |
| 1981 | + { |
| 1982 | + d.push_back(prop.new_variables(d_bits)); |
| 1983 | + } |
| 1984 | + |
| 1985 | + // r(0) |
| 1986 | + bvt r_0 = d[0]; |
| 1987 | + prop.l_set_to_true(equal( |
| 1988 | + r_0, |
| 1989 | + unsigned_multiplier( |
| 1990 | + zero_extension(a[0], r_0.size()), zero_extension(b[0], r_0.size())))); |
| 1991 | + |
| 1992 | + for(std::size_t j = 1; j < a.size(); ++j) |
| 1993 | + { |
| 1994 | + // r(2^(j-1)) |
| 1995 | + bvt r_j = zero_extension( |
| 1996 | + d[0], std::min(_op0.size(), d[0].size() + (j - 1) * (d.size() - 1))); |
| 1997 | + for(std::size_t i = 1; i < d.size(); ++i) |
| 1998 | + { |
| 1999 | + r_j = add( |
| 2000 | + r_j, |
| 2001 | + shift( |
| 2002 | + zero_extension(d[i], r_j.size()), shiftt::SHIFT_LEFT, (j - 1) * i)); |
| 2003 | + } |
| 2004 | + |
| 2005 | + bvt a_even = zero_extension(a[0], r_j.size()); |
| 2006 | + for(std::size_t i = 2; i < a.size(); i += 2) |
| 2007 | + { |
| 2008 | + a_even = add( |
| 2009 | + a_even, |
| 2010 | + shift( |
| 2011 | + zero_extension(a[i], a_even.size()), |
| 2012 | + shiftt::SHIFT_LEFT, |
| 2013 | + (j - 1) * i)); |
| 2014 | + } |
| 2015 | + bvt a_odd = zero_extension(a[1], r_j.size()); |
| 2016 | + for(std::size_t i = 3; i < a.size(); i += 2) |
| 2017 | + { |
| 2018 | + a_odd = add( |
| 2019 | + a_odd, |
| 2020 | + shift( |
| 2021 | + zero_extension(a[i], a_odd.size()), |
| 2022 | + shiftt::SHIFT_LEFT, |
| 2023 | + (j - 1) * (i - 1))); |
| 2024 | + } |
| 2025 | + bvt b_even = zero_extension(b[0], r_j.size()); |
| 2026 | + for(std::size_t i = 2; i < b.size(); i += 2) |
| 2027 | + { |
| 2028 | + b_even = add( |
| 2029 | + b_even, |
| 2030 | + shift( |
| 2031 | + zero_extension(b[i], b_even.size()), |
| 2032 | + shiftt::SHIFT_LEFT, |
| 2033 | + (j - 1) * i)); |
| 2034 | + } |
| 2035 | + bvt b_odd = zero_extension(b[1], r_j.size()); |
| 2036 | + for(std::size_t i = 3; i < b.size(); i += 2) |
| 2037 | + { |
| 2038 | + b_odd = add( |
| 2039 | + b_odd, |
| 2040 | + shift( |
| 2041 | + zero_extension(b[i], b_odd.size()), |
| 2042 | + shiftt::SHIFT_LEFT, |
| 2043 | + (j - 1) * (i - 1))); |
| 2044 | + } |
| 2045 | + |
| 2046 | + prop.l_set_to_true(equal( |
| 2047 | + r_j, |
| 2048 | + unsigned_multiplier( |
| 2049 | + add(a_even, shift(a_odd, shiftt::SHIFT_LEFT, j - 1)), |
| 2050 | + add(b_even, shift(b_odd, shiftt::SHIFT_LEFT, j - 1))))); |
| 2051 | + |
| 2052 | + // r(-2^(j-1)) |
| 2053 | + bvt r_minus_j = zero_extension( |
| 2054 | + d[0], std::min(_op0.size(), d[0].size() + (j - 1) * (d.size() - 1))); |
| 2055 | + for(std::size_t i = 1; i < d.size(); ++i) |
| 2056 | + { |
| 2057 | + if(i % 2 == 1) |
| 2058 | + { |
| 2059 | + r_minus_j = sub( |
| 2060 | + r_minus_j, |
| 2061 | + shift( |
| 2062 | + zero_extension(d[i], r_minus_j.size()), |
| 2063 | + shiftt::SHIFT_LEFT, |
| 2064 | + (j - 1) * i)); |
| 2065 | + } |
| 2066 | + else |
| 2067 | + { |
| 2068 | + r_minus_j = add( |
| 2069 | + r_minus_j, |
| 2070 | + shift( |
| 2071 | + zero_extension(d[i], r_minus_j.size()), |
| 2072 | + shiftt::SHIFT_LEFT, |
| 2073 | + (j - 1) * i)); |
| 2074 | + } |
| 2075 | + } |
| 2076 | + |
| 2077 | + prop.l_set_to_true(equal( |
| 2078 | + r_minus_j, |
| 2079 | + unsigned_multiplier( |
| 2080 | + sub(a_even, shift(a_odd, shiftt::SHIFT_LEFT, j - 1)), |
| 2081 | + sub(b_even, shift(b_odd, shiftt::SHIFT_LEFT, j - 1))))); |
| 2082 | + } |
| 2083 | + |
| 2084 | + if(c_ops.empty()) |
| 2085 | + return zeros(_op0.size()); |
| 2086 | + else |
| 2087 | + { |
| 2088 | +#ifdef WALLACE_TREE |
| 2089 | + return wallace_tree(c_ops); |
| 2090 | +#elif defined(DADDA_TREE) |
| 2091 | + return dadda_tree(c_ops); |
| 2092 | +#else |
| 2093 | + bvt product = c_ops.front(); |
| 2094 | + |
| 2095 | + for(auto it = std::next(c_ops.begin()); it != c_ops.end(); ++it) |
| 2096 | + product = add(product, *it); |
| 2097 | + |
| 2098 | + return product; |
| 2099 | +#endif |
| 2100 | + } |
| 2101 | +} |
| 2102 | + |
1951 | 2103 | bvt bv_utilst::unsigned_multiplier_no_overflow(
|
1952 | 2104 | const bvt &op0,
|
1953 | 2105 | const bvt &op1)
|
@@ -2000,6 +2152,8 @@ bvt bv_utilst::signed_multiplier(const bvt &op0, const bvt &op1)
|
2000 | 2152 |
|
2001 | 2153 | #ifdef USE_KARATSUBA
|
2002 | 2154 | bvt result = unsigned_karatsuba_multiplier(neg0, neg1);
|
| 2155 | +#elif defined(USE_TOOM_COOK) |
| 2156 | + bvt result = unsigned_toom_cook_multiplier(neg0, neg1); |
2003 | 2157 | #else
|
2004 | 2158 | bvt result=unsigned_multiplier(neg0, neg1);
|
2005 | 2159 | #endif
|
@@ -2073,6 +2227,9 @@ bvt bv_utilst::multiplier(
|
2073 | 2227 | #ifdef USE_KARATSUBA
|
2074 | 2228 | case representationt::UNSIGNED:
|
2075 | 2229 | return unsigned_karatsuba_multiplier(op0, op1);
|
| 2230 | +#elif defined(USE_TOOM_COOK) |
| 2231 | + case representationt::UNSIGNED: |
| 2232 | + return unsigned_toom_cook_multiplier(op0, op1); |
2076 | 2233 | #else
|
2077 | 2234 | case representationt::UNSIGNED: return unsigned_multiplier(op0, op1);
|
2078 | 2235 | #endif
|
|
0 commit comments