diff --git a/clasp/shared_context.h b/clasp/shared_context.h index cc54fa5..d19e2b6 100644 --- a/clasp/shared_context.h +++ b/clasp/shared_context.h @@ -453,7 +453,7 @@ class Distributor { uint32 types : 3; /*!< Restrict distribution to these types. */ }; static uint64 mask(uint32 i) { return uint64(1) << i; } - static uint32 initSet(uint32 sz) { return (uint64(1) << sz) - 1; } + static uint64 initSet(uint32 sz) { return sz < 64 ? (uint64(1) << sz) - 1 : UINT64_MAX; } static bool inSet(uint64 s, uint32 id) { return (s & mask(id)) != 0; } explicit Distributor(const Policy& p); virtual ~Distributor(); diff --git a/src/parallel_solve.cpp b/src/parallel_solve.cpp index 7a28ab3..38f94e0 100644 --- a/src/parallel_solve.cpp +++ b/src/parallel_solve.cpp @@ -965,24 +965,23 @@ uint64 ParallelSolveOptions::initPeerMask(uint32 id, Integration::Topology topo, uint32 next = (id + 1) % maxT; return Distributor::mask(prev) | Distributor::mask(next); } - bool ext = topo == Integration::topo_cubex; - uint32 n = maxT; - uint32 k = 1; - for (uint32 i = n / 2; i > 0; i /= 2, k *= 2) { } - uint64 res = 0, x = 1; + const uint32 n = maxT; + const uint32 k = (1u << Clasp::log2(n)); + const uint32 s = k ^ id; + const bool ext = topo == Integration::topo_cubex; + uint64 res = 0; for (uint32 m = 1; m <= k; m *= 2) { uint32 i = m ^ id; - if (i < n) { res |= (x << i); } - else if (ext && k != m) { res |= (x << (i^k)); } + if (i < n) { res |= Distributor::mask(i); } + else if (ext && k != m) { res |= Distributor::mask(i^k); } } - if (ext) { - uint32 s = k ^ id; - for(uint32 m = 1; m < k && s >= n; m *= 2) { + if (ext && s >= n) { + for(uint32 m = 1; m < k; m *= 2) { uint32 i = m ^ s; - if (i < n) { res |= (x << i); } + if (i < n) { res |= Distributor::mask(i); } } } - assert( (res & (x< #include #include +#if CLASP_HAS_THREADS +#include +#endif #include "catch.hpp" + + namespace Clasp { namespace Test { using namespace Clasp::mt; struct TestingConstraint : public Constraint { @@ -150,6 +155,7 @@ static void testDefaults(SharedContext& ctx) { ctx.setFrozen(0, true); REQUIRE(ctx.stats().vars.frozen == 0); } + TEST_CASE("Solver types", "[core]") { SECTION("test reason store") { if (sizeof(void*) == sizeof(uint32)) { @@ -2011,6 +2017,103 @@ TEST_CASE("Solver mt", "[core][mt]") { integrateGp(s2, sGp); REQUIRE(s2.isTrue(~a)); } + + SECTION("testPeerComputation") { + const uint32 maxT = 64; + struct { + uint32 operator()(uint64 t) const { + for (uint32 i = 0;; t ^= right_most_bit(t), ++i) { if (!t) { return i; } } + } + } nbits; + SECTION("set") { + for (uint32 nt = 1; nt <= maxT; ++nt) { + uint64 set = Distributor::initSet(nt); + for (uint32 id = 0; id < nt; ++id) { + CAPTURE(nt, id); + REQUIRE(Distributor::inSet(set, id)); + REQUIRE(nbits(Distributor::mask(id)) == 1); + } + } + } + typedef ParallelSolveOptions::Integration::Topology Topology; + SECTION("all") { + Topology topo = Topology::topo_all; + for (uint32 nt = 1; nt <= maxT; ++nt) { + uint64 all = Distributor::initSet(nt); + for (uint32 id = 0; id < nt; ++id) { + CAPTURE(nt, id); + uint64 expected = all ^ (uint64(1) << id); + REQUIRE(ParallelSolveOptions::initPeerMask(id, topo, nt) == expected); + } + } + } + SECTION("ring") { + Topology topo = Topology::topo_ring; + for (uint32 nt = 1; nt <= maxT; ++nt) { + for (uint32 id = 0; id < nt; ++id) { + CAPTURE(nt, id); + uint32 prev = (id > 0 ? id : nt) - 1; + uint32 next = (id + 1) % nt; + uint64 expected = (uint64(1) << prev) | (uint64(1) << next); + CHECK(ParallelSolveOptions::initPeerMask(id, topo, nt) == expected); + } + } + } + SECTION("cube and cubex") { + Topology tCube = Topology::topo_cube; + Topology tCubeX = Topology::topo_cubex; + uint64 nodes[8] = { + /* 0: */ Distributor::mask(1) | Distributor::mask(2) | Distributor::mask(4), + /* 1: */ Distributor::mask(0) | Distributor::mask(3) | Distributor::mask(5), + /* 2: */ Distributor::mask(0) | Distributor::mask(3) | Distributor::mask(6), + /* 3: */ Distributor::mask(1) | Distributor::mask(2) | Distributor::mask(7), + /* 4: */ Distributor::mask(0) | Distributor::mask(5) | Distributor::mask(6), + /* 5: */ Distributor::mask(1) | Distributor::mask(4) | Distributor::mask(7), + /* 6: */ Distributor::mask(2) | Distributor::mask(4) | Distributor::mask(7), + /* 7: */ Distributor::mask(3) | Distributor::mask(5) | Distributor::mask(6), + }; + for (uint32 nt = 2, dim = 0; nt <= maxT; ++nt) { + uint64 all = Distributor::initSet(nt); + bool powerOfTwo = (nt & (nt - 1)) == 0; + if (powerOfTwo) { ++dim; } + for (uint32 id = 0; id < nt; ++id) { + CAPTURE(nt, id, dim); + uint64 cube = ParallelSolveOptions::initPeerMask(id, tCube, nt); + uint64 cubeX = ParallelSolveOptions::initPeerMask(id, tCubeX, nt); + CHECK_FALSE(Distributor::inSet(cube, id)); + CHECK_FALSE(Distributor::inSet(cubeX, id)); + uint32 cubeBits = nbits(cube); + uint32 cubeXBits = nbits(cubeX); + CAPTURE(cubeBits, cubeXBits); + if (powerOfTwo) { + CHECK(cube == cubeX); + CHECK(cubeBits == dim); + if (dim == 3) { + CHECK(cube == nodes[id]); + } + } + else { + if (cubeBits != dim) { + uint64 cubePow = ParallelSolveOptions::initPeerMask(id, tCube, 1 << (dim+1)); + cubePow &= all; + CHECK(cubePow == cube); + } + CHECK(cube < Distributor::mask(nt)); + CHECK(cubeX < Distributor::mask(nt)); + CHECK(cubeBits <= cubeXBits); + } + for (uint32 o = 0; o < nt; ++o) { + if (Distributor::inSet(cubeX, o)) { + uint64 peerMask = ParallelSolveOptions::initPeerMask(o, tCubeX, nt); + CHECK(Distributor::inSet(peerMask, id)); + peerMask = ParallelSolveOptions::initPeerMask(o, tCube, nt); + CHECK(Distributor::inSet(cube, o) == Distributor::inSet(peerMask, id)); + } + } + } + } + } + } } #endif } }