diff --git a/.github/CHANGELOG.md b/.github/CHANGELOG.md index 423c10879..a3d437563 100644 --- a/.github/CHANGELOG.md +++ b/.github/CHANGELOG.md @@ -9,6 +9,9 @@ ### Improvements +* Unify excitation gates memory layout to row-major for both LGPU and LT. + [(#959)](https://github.com/PennyLaneAI/pennylane-lightning/pull/959) + * Update the `lightning.kokkos` CUDA backend for compatibility with Catalyst. [(#942)](https://github.com/PennyLaneAI/pennylane-lightning/pull/942) diff --git a/pennylane_lightning/core/_version.py b/pennylane_lightning/core/_version.py index e410f98f4..8432c4f82 100644 --- a/pennylane_lightning/core/_version.py +++ b/pennylane_lightning/core/_version.py @@ -16,4 +16,4 @@ Version number (major.minor.patch[-label]) """ -__version__ = "0.40.0-dev3" +__version__ = "0.40.0-dev4" diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp index f9c0f0116..6fbf9d8b8 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaMPI.hpp @@ -399,39 +399,18 @@ class StateVectorCudaMPI final applyParametricPauliGate({opName}, ctrls, tgts, params.front(), adjoint); } else if (opName == "Rot" || opName == "CRot") { - if (adjoint) { - auto rot_matrix = - cuGates::getRot(params[2], params[1], params[0]); - applyDeviceMatrixGate(rot_matrix.data(), ctrls, tgts, true); - } else { - auto rot_matrix = - cuGates::getRot(params[0], params[1], params[2]); - applyDeviceMatrixGate(rot_matrix.data(), ctrls, tgts, false); - } + auto rot_matrix = + adjoint + ? cuGates::getRot(params[2], params[1], params[0]) + : cuGates::getRot(params[0], params[1], params[2]); + applyDeviceMatrixGate(rot_matrix.data(), ctrls, tgts, adjoint); } else if (opName == "Matrix") { - DataBuffer d_matrix{ - gate_matrix.size(), BaseType::getDataBuffer().getDevTag(), - true}; - d_matrix.CopyHostDataToGpu(gate_matrix.data(), d_matrix.getLength(), - false); - // ensure wire indexing correctly preserved for tensor-observables - const std::vector ctrls_local{ctrls.rbegin(), - ctrls.rend()}; - const std::vector tgts_local{tgts.rbegin(), - tgts.rend()}; - applyDeviceMatrixGate(d_matrix.getData(), ctrls_local, tgts_local, - adjoint); + applyDeviceMatrixGate(gate_matrix.data(), ctrls, tgts, adjoint); } else if (par_gates_.find(opName) != par_gates_.end()) { par_gates_.at(opName)(wires, adjoint, params); } else { // No offloadable function call; defer to matrix passing auto &&par = (params.empty()) ? std::vector{0.0} : params; - // ensure wire indexing correctly preserved for tensor-observables - const std::vector ctrls_local{ctrls.rbegin(), - ctrls.rend()}; - const std::vector tgts_local{tgts.rbegin(), - tgts.rend()}; - if (!gate_cache_.gateExists(opName, par[0]) && gate_matrix.empty()) { std::string message = "Currently unsupported gate: " + opName; @@ -440,8 +419,8 @@ class StateVectorCudaMPI final gate_cache_.add_gate(opName, par[0], gate_matrix); } applyDeviceMatrixGate( - gate_cache_.get_gate_device_ptr(opName, par[0]), ctrls_local, - tgts_local, adjoint); + gate_cache_.get_gate_device_ptr(opName, par[0]), ctrls, tgts, + adjoint); } } @@ -1826,9 +1805,8 @@ class StateVectorCudaMPI final * @param tgts Target qubits. * @param use_adjoint Use adjoint of given gate. */ - void applyCuSVDeviceMatrixGate(const CFP_t *matrix, - const std::vector &ctrls, - const std::vector &tgts, + void applyCuSVDeviceMatrixGate(const CFP_t *matrix, std::vector &ctrls, + std::vector &tgts, bool use_adjoint = false) { void *extraWorkspace = nullptr; std::size_t extraWorkspaceSizeInBytes = 0; @@ -1846,6 +1824,9 @@ class StateVectorCudaMPI final compute_type = CUSTATEVEC_COMPUTE_32F; } + std::reverse(tgts.begin(), tgts.end()); + std::reverse(ctrls.begin(), ctrls.end()); + // check the size of external workspace PL_CUSTATEVEC_IS_SUCCESS(custatevecApplyMatrixGetWorkspaceSize( /* custatevecHandle_t */ handle_.get(), diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp index d66e32c2e..d354133be 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/StateVectorCudaManaged.hpp @@ -314,29 +314,12 @@ class StateVectorCudaManaged applyDeviceMatrixGate_(rot_matrix.data(), ctrls, tgts, false); } } else if (opName == "Matrix") { - DataBuffer d_matrix{ - gate_matrix.size(), BaseType::getDataBuffer().getDevTag(), - true}; - d_matrix.CopyHostDataToGpu(gate_matrix.data(), d_matrix.getLength(), - false); - // ensure wire indexing correctly preserved for tensor-observables - const std::vector ctrls_local{ctrls.rbegin(), - ctrls.rend()}; - const std::vector tgts_local{tgts.rbegin(), - tgts.rend()}; - applyDeviceMatrixGate_(d_matrix.getData(), ctrls_local, tgts_local, - adjoint); + applyDeviceMatrixGate_(gate_matrix.data(), ctrls, tgts, adjoint); } else if (par_gates_.find(opName) != par_gates_.end()) { par_gates_.at(opName)(wires, adjoint, params); } else { // No offloadable function call; defer to matrix passing auto &&par = (params.empty()) ? std::vector{0.0} : params; - // ensure wire indexing correctly preserved for tensor-observables - const std::vector ctrls_local{ctrls.rbegin(), - ctrls.rend()}; - const std::vector tgts_local{tgts.rbegin(), - tgts.rend()}; - if (!gate_cache_.gateExists(opName, par[0]) && gate_matrix.empty()) { std::string message = "Currently unsupported gate: " + opName + @@ -346,8 +329,8 @@ class StateVectorCudaManaged gate_cache_.add_gate(opName, par[0], gate_matrix); } applyDeviceMatrixGate_( - gate_cache_.get_gate_device_ptr(opName, par[0]), ctrls_local, - tgts_local, adjoint); + gate_cache_.get_gate_device_ptr(opName, par[0]), ctrls, tgts, + adjoint); } } @@ -432,9 +415,6 @@ class StateVectorCudaManaged gate_cache_.add_gate(opName, par[0], matrix_cu); } - std::reverse(ctrlsInt.begin(), ctrlsInt.end()); - std::reverse(tgtsInt.begin(), tgtsInt.end()); - std::reverse(ctrls_valuesInt.begin(), ctrls_valuesInt.end()); applyDeviceGeneralGate_( gate_cache_.get_gate_device_ptr(opName, par[0]), ctrlsInt, tgtsInt, ctrls_valuesInt, adjoint); @@ -474,10 +454,6 @@ class StateVectorCudaManaged auto ctrls_valuesInt = Pennylane::Util::cast_vector(controlled_values); - std::reverse(ctrlsInt.begin(), ctrlsInt.end()); - std::reverse(tgtsInt.begin(), tgtsInt.end()); - std::reverse(ctrls_valuesInt.begin(), ctrls_valuesInt.end()); - applyDeviceGeneralGate_(d_matrix.getData(), ctrlsInt, tgtsInt, ctrls_valuesInt, inverse); } @@ -1620,10 +1596,9 @@ class StateVectorCudaManaged * @param ctrls_values Control values. * @param use_adjoint Use adjoint of given gate. Defaults to false. */ - void applyDeviceGeneralGate_(const CFP_t *matrix, - const std::vector &ctrls, - const std::vector &tgts, - const std::vector &ctrls_values, + void applyDeviceGeneralGate_(const CFP_t *matrix, std::vector &ctrls, + std::vector &tgts, + std::vector &ctrls_values, bool use_adjoint = false) { void *extraWorkspace = nullptr; std::size_t extraWorkspaceSizeInBytes = 0; @@ -1641,6 +1616,10 @@ class StateVectorCudaManaged compute_type = CUSTATEVEC_COMPUTE_32F; } + std::reverse(tgts.begin(), tgts.end()); + std::reverse(ctrls.begin(), ctrls.end()); + std::reverse(ctrls_values.begin(), ctrls_values.end()); + // check the size of external workspace PL_CUSTATEVEC_IS_SUCCESS(custatevecApplyMatrixGetWorkspaceSize( /* custatevecHandle_t */ handle_.get(), diff --git a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Generators.cpp b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Generators.cpp index 1129e5a66..a5aba04eb 100644 --- a/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Generators.cpp +++ b/pennylane_lightning/core/src/simulators/lightning_gpu/gates/tests/Test_StateVectorCudaManaged_Generators.cpp @@ -793,14 +793,10 @@ TEST_CASE("Generators::applyGeneratorControlledPhaseShift", } TEST_CASE("Generators::applyGeneratorSingleExcitation", "[GateGenerators]") { - std::vector::CFP_t> matrix{ - // clang-format off - {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 0.0}, {0.0, -1.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 1.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0} - // clang-format on - }; + std::vector::CFP_t> matrix( + 16, {0.0, 0.0}); + matrix[6] = {0.0, -1.0}; + matrix[9] = {0.0, 1.0}; std::mt19937 re{1337U}; for (std::size_t num_qubits = 2; num_qubits <= 5; num_qubits++) { @@ -875,14 +871,12 @@ TEST_CASE("Generators::applyGeneratorSingleExcitation", "[GateGenerators]") { TEST_CASE("Generators::applyGeneratorSingleExcitationMinus", "[GateGenerators]") { - std::vector::CFP_t> matrix{ - // clang-format off - {1.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 0.0}, {0.0,-1.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 1.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {1.0, 0.0} - // clang-format on - }; + std::vector::CFP_t> matrix( + 16, {0.0, 0.0}); + matrix[0] = {1.0, 0.0}; + matrix[6] = {0.0, -1.0}; + matrix[9] = {0.0, 1.0}; + matrix[15] = {1.0, 0.0}; std::mt19937 re{1337U}; for (std::size_t num_qubits = 2; num_qubits <= 5; num_qubits++) { @@ -957,14 +951,12 @@ TEST_CASE("Generators::applyGeneratorSingleExcitationMinus", TEST_CASE("Generators::applyGeneratorSingleExcitationPlus", "[GateGenerators]") { - std::vector::CFP_t> matrix{ - // clang-format off - {-1.0, 0.0},{0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 0.0}, {0.0,-1.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 1.0}, {0.0, 0.0}, {0.0, 0.0}, - {0.0, 0.0}, {0.0, 0.0}, {0.0, 0.0}, {-1.0, 0.0} - // clang-format on - }; + std::vector::CFP_t> matrix( + 16, {0.0, 0.0}); + matrix[0] = {-1.0, 0.0}; + matrix[6] = {0.0, -1.0}; + matrix[9] = {0.0, 1.0}; + matrix[15] = {-1.0, 0.0}; std::mt19937 re{1337U}; for (std::size_t num_qubits = 2; num_qubits <= 5; num_qubits++) { @@ -1058,26 +1050,10 @@ TEST_CASE("Generators::applyGeneratorDoubleExcitation_GPU", */ // clang-format on - std::vector::CFP_t> matrix{ - // clang-format off - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, -1.0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 1.0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0} - // clang-format on - }; + std::vector::CFP_t> matrix( + 256, {0.0, 0.0}); + matrix[60] = {0.0, -1.0}; + matrix[195] = {0.0, 1.0}; std::mt19937 re{1337U}; for (std::size_t num_qubits = 4; num_qubits <= 8; num_qubits++) { @@ -1167,26 +1143,16 @@ TEST_CASE("Generators::applyGeneratorDoubleExcitation_GPU", TEST_CASE("Generators::applyGeneratorDoubleExcitationMinus_GPU", "[GateGenerators]") { - std::vector::CFP_t> matrix{ - // clang-format off - {1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, -1.0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 1.0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{1.0, 0} - // clang-format on - }; + std::vector::CFP_t> matrix( + 256, {0.0, 0.0}); + matrix[60] = {0.0, -1.0}; + matrix[195] = {0.0, 1.0}; + for (std::size_t i = 0; i < 16; i++) { + if (i != 3 && i != 12) { + const size_t idx = i * 17; + matrix[idx] = {1.0, 0.0}; + } + } std::mt19937 re{1337U}; for (std::size_t num_qubits = 4; num_qubits <= 8; num_qubits++) { @@ -1276,26 +1242,16 @@ TEST_CASE("Generators::applyGeneratorDoubleExcitationMinus_GPU", TEST_CASE("Generators::applyGeneratorDoubleExcitationPlus_GPU", "[GateGenerators]") { - std::vector::CFP_t> matrix{ - // clang-format off - {-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, -1.0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 1.0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0},{0, 0}, - {0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{0, 0},{-1.0, 0} - // clang-format on - }; + std::vector::CFP_t> matrix( + 256, {0.0, 0.0}); + matrix[60] = {0.0, -1.0}; + matrix[195] = {0.0, 1.0}; + for (std::size_t i = 0; i < 16; i++) { + if (i != 3 && i != 12) { + const size_t idx = i * 17; + matrix[idx] = {-1.0, 0.0}; + } + } std::mt19937 re{1337U}; for (std::size_t num_qubits = 4; num_qubits <= 8; num_qubits++) { diff --git a/pennylane_lightning/core/src/utils/cuda_utils/cuGates_host.hpp b/pennylane_lightning/core/src/utils/cuda_utils/cuGates_host.hpp index 53b712ed5..4d09555d7 100644 --- a/pennylane_lightning/core/src/utils/cuda_utils/cuGates_host.hpp +++ b/pennylane_lightning/core/src/utils/cuda_utils/cuGates_host.hpp @@ -654,15 +654,8 @@ template static auto getSingleExcitation(U angle) -> std::vector { const U p2 = angle / 2; const CFP_t c{std::cos(p2), 0}; - // TODO: To remove conditional compilation here in the future, current - // implementation will block the simultaneous installation of LGPU and - // cutensornet backends - -#ifdef _ENABLE_PLGPU - const CFP_t s{-std::sin(p2), 0}; // column-major -#else const CFP_t s{std::sin(p2), 0}; // row-major -#endif + return {cuUtil::ONE(), cuUtil::ZERO(), cuUtil::ZERO(), @@ -708,17 +701,17 @@ static auto getSingleExcitation(const std::vector ¶ms) template static constexpr auto getGeneratorSingleExcitation() -> std::vector { return { - cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::IMAG(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), + -cuUtil::IMAG(), cuUtil::ZERO(), - cuUtil::ZERO(), -cuUtil::IMAG(), - cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::IMAG(), + cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), }; } @@ -739,14 +732,7 @@ static auto getSingleExcitationMinus(U angle) -> std::vector { const CFP_t e = cuUtil::complexToCu>(std::exp(std::complex(0, -p2))); const CFP_t c{std::cos(p2), 0}; -// TODO: To remove conditional compilation here in the future, current -// implementation will block the simultaneous installation of LGPU and -// cutensornet backends -#ifdef _ENABLE_PLGPU - const CFP_t s{-std::sin(p2), 0}; // column-major -#else const CFP_t s{std::sin(p2), 0}; // row-major -#endif return {e, cuUtil::ZERO(), @@ -795,17 +781,17 @@ template static constexpr auto getGeneratorSingleExcitationMinus() -> std::vector { return { - cuUtil::ONE(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ONE(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::IMAG(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), + -cuUtil::IMAG(), cuUtil::ZERO(), - cuUtil::ZERO(), -cuUtil::IMAG(), - cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::IMAG(), + cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ONE(), + cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ONE(), }; } @@ -826,15 +812,8 @@ static auto getSingleExcitationPlus(U angle) -> std::vector { const CFP_t e = cuUtil::complexToCu>(std::exp(std::complex(0, p2))); const CFP_t c{std::cos(p2), 0}; - // TODO: To remove conditional compilation here in the future, current - // implementation will block the simultaneous installation of LGPU and - // cutensornet backends - -#ifdef _ENABLE_PLGPU - const CFP_t s{-std::sin(p2), 0}; // column-major -#else const CFP_t s{std::sin(p2), 0}; // row-major -#endif + return {e, cuUtil::ZERO(), cuUtil::ZERO(), @@ -881,17 +860,17 @@ static auto getSingleExcitationPlus(const std::vector ¶ms) template static constexpr auto getGeneratorSingleExcitationPlus() -> std::vector { return { - -cuUtil::ONE(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), + -cuUtil::ONE(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::IMAG(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::ZERO(), + -cuUtil::IMAG(), cuUtil::ZERO(), - cuUtil::ZERO(), -cuUtil::IMAG(), - cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), cuUtil::IMAG(), + cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), cuUtil::ZERO(), - cuUtil::ZERO(), -cuUtil::ONE(), + cuUtil::ZERO(), cuUtil::ZERO(), + cuUtil::ZERO(), -cuUtil::ONE(), }; } @@ -909,15 +888,8 @@ template static auto getDoubleExcitation(U angle) -> std::vector { const U p2 = angle / 2; const CFP_t c{std::cos(p2), 0}; - // TODO: To remove conditional compilation here in the future, current - // implementation will block the simultaneous installation of LGPU and - // cutensornet backends - -#ifdef _ENABLE_PLGPU - const CFP_t s{-std::sin(p2), 0}; // column-major -#else const CFP_t s{std::sin(p2), 0}; // row-major -#endif + std::vector mat(256, cuUtil::ZERO()); mat[0] = cuUtil::ONE(); mat[17] = cuUtil::ONE(); @@ -967,8 +939,8 @@ static auto getDoubleExcitation(const std::vector ¶ms) template static constexpr auto getGeneratorDoubleExcitation() -> std::vector { std::vector mat(256, cuUtil::ZERO()); - mat[60] = cuUtil::IMAG(); - mat[195] = -cuUtil::IMAG(); + mat[60] = -cuUtil::IMAG(); + mat[195] = cuUtil::IMAG(); return mat; } @@ -989,15 +961,8 @@ static auto getDoubleExcitationMinus(U angle) -> std::vector { const CFP_t e = cuUtil::complexToCu>(std::exp(std::complex(0, -p2))); const CFP_t c{std::cos(p2), 0}; - // TODO: To remove conditional compilation here in the future, current - // implementation will block the simultaneous installation of LGPU and - // cutensornet backends - -#ifdef _ENABLE_PLGPU - const CFP_t s{-std::sin(p2), 0}; // column-major -#else const CFP_t s{std::sin(p2), 0}; // row-major -#endif + std::vector mat(256, cuUtil::ZERO()); mat[0] = e; mat[17] = e; @@ -1052,7 +1017,7 @@ static constexpr auto getGeneratorDoubleExcitationMinus() mat[0] = cuUtil::ONE(); mat[17] = cuUtil::ONE(); mat[34] = cuUtil::ONE(); - mat[60] = cuUtil::IMAG(); + mat[60] = -cuUtil::IMAG(); mat[68] = cuUtil::ONE(); mat[85] = cuUtil::ONE(); mat[102] = cuUtil::ONE(); @@ -1061,7 +1026,7 @@ static constexpr auto getGeneratorDoubleExcitationMinus() mat[153] = cuUtil::ONE(); mat[170] = cuUtil::ONE(); mat[187] = cuUtil::ONE(); - mat[195] = -cuUtil::IMAG(); + mat[195] = cuUtil::IMAG(); mat[221] = cuUtil::ONE(); mat[238] = cuUtil::ONE(); mat[255] = cuUtil::ONE(); @@ -1085,14 +1050,8 @@ static auto getDoubleExcitationPlus(U angle) -> std::vector { const CFP_t e = cuUtil::complexToCu>(std::exp(std::complex(0, p2))); const CFP_t c{std::cos(p2), 0}; - // TODO: To remove conditional compilation here in the future, current - // implementation will block the simultaneous installation of LGPU and - // cutensornet backends -#ifdef _ENABLE_PLGPU - const CFP_t s{-std::sin(p2), 0}; // column-major -#else const CFP_t s{std::sin(p2), 0}; // row-major -#endif + std::vector mat(256, cuUtil::ZERO()); mat[0] = e; mat[17] = e; @@ -1146,7 +1105,7 @@ static constexpr auto getGeneratorDoubleExcitationPlus() -> std::vector { mat[0] = -cuUtil::ONE(); mat[17] = -cuUtil::ONE(); mat[34] = -cuUtil::ONE(); - mat[60] = cuUtil::IMAG(); + mat[60] = -cuUtil::IMAG(); mat[68] = -cuUtil::ONE(); mat[85] = -cuUtil::ONE(); mat[102] = -cuUtil::ONE(); @@ -1155,7 +1114,7 @@ static constexpr auto getGeneratorDoubleExcitationPlus() -> std::vector { mat[153] = -cuUtil::ONE(); mat[170] = -cuUtil::ONE(); mat[187] = -cuUtil::ONE(); - mat[195] = -cuUtil::IMAG(); + mat[195] = cuUtil::IMAG(); mat[221] = -cuUtil::ONE(); mat[238] = -cuUtil::ONE(); mat[255] = -cuUtil::ONE();